commit
f2947c14f5
112
.github/workflows/jan-electron-linter-and-test.yml
vendored
112
.github/workflows/jan-electron-linter-and-test.yml
vendored
@ -57,19 +57,19 @@ jobs:
|
||||
rm -rf ~/jan
|
||||
make clean
|
||||
|
||||
# - name: Get Commit Message for PR
|
||||
# if : github.event_name == 'pull_request'
|
||||
# run: |
|
||||
# echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}})" >> $GITHUB_ENV
|
||||
- name: Get Commit Message for PR
|
||||
if : github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}})" >> $GITHUB_ENV
|
||||
|
||||
# - name: Get Commit Message for push event
|
||||
# if : github.event_name == 'push'
|
||||
# run: |
|
||||
# echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}})" >> $GITHUB_ENV
|
||||
- name: Get Commit Message for push event
|
||||
if : github.event_name == 'push'
|
||||
run: |
|
||||
echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}})" >> $GITHUB_ENV
|
||||
|
||||
# - name: "Config report portal"
|
||||
# run: |
|
||||
# make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App macos" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
|
||||
- name: "Config report portal"
|
||||
run: |
|
||||
make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App macos" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
|
||||
|
||||
- name: Linter and test
|
||||
run: |
|
||||
@ -78,9 +78,9 @@ jobs:
|
||||
make test
|
||||
env:
|
||||
CSC_IDENTITY_AUTO_DISCOVERY: "false"
|
||||
# TURBO_API: "${{ secrets.TURBO_API }}"
|
||||
# TURBO_TEAM: "macos"
|
||||
# TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
|
||||
TURBO_API: "${{ secrets.TURBO_API }}"
|
||||
TURBO_TEAM: "macos"
|
||||
TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
|
||||
|
||||
test-on-macos-pr-target:
|
||||
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository
|
||||
@ -141,16 +141,16 @@ jobs:
|
||||
}
|
||||
make clean
|
||||
|
||||
# - name: Get Commit Message for push event
|
||||
# if : github.event_name == 'push'
|
||||
# shell: bash
|
||||
# run: |
|
||||
# echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}}" >> $GITHUB_ENV
|
||||
- name: Get Commit Message for push event
|
||||
if : github.event_name == 'push'
|
||||
shell: bash
|
||||
run: |
|
||||
echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}}" >> $GITHUB_ENV
|
||||
|
||||
# - name: "Config report portal"
|
||||
# shell: bash
|
||||
# run: |
|
||||
# make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Windows ${{ matrix.antivirus-tools }}" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
|
||||
- name: "Config report portal"
|
||||
shell: bash
|
||||
run: |
|
||||
make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Windows ${{ matrix.antivirus-tools }}" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
|
||||
|
||||
- name: Linter and test
|
||||
shell: powershell
|
||||
@ -158,10 +158,10 @@ jobs:
|
||||
npm config set registry ${{ secrets.NPM_PROXY }} --global
|
||||
yarn config set registry ${{ secrets.NPM_PROXY }} --global
|
||||
make test
|
||||
# env:
|
||||
# TURBO_API: "${{ secrets.TURBO_API }}"
|
||||
# TURBO_TEAM: "windows"
|
||||
# TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
|
||||
env:
|
||||
TURBO_API: "${{ secrets.TURBO_API }}"
|
||||
TURBO_TEAM: "windows"
|
||||
TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
|
||||
test-on-windows-pr:
|
||||
if: (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository)
|
||||
runs-on: windows-desktop-default-windows-security
|
||||
@ -189,16 +189,16 @@ jobs:
|
||||
}
|
||||
make clean
|
||||
|
||||
# - name: Get Commit Message for PR
|
||||
# if : github.event_name == 'pull_request'
|
||||
# shell: bash
|
||||
# run: |
|
||||
# echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}}" >> $GITHUB_ENV
|
||||
- name: Get Commit Message for PR
|
||||
if : github.event_name == 'pull_request'
|
||||
shell: bash
|
||||
run: |
|
||||
echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}}" >> $GITHUB_ENV
|
||||
|
||||
# - name: "Config report portal"
|
||||
# shell: bash
|
||||
# run: |
|
||||
# make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Windows" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
|
||||
- name: "Config report portal"
|
||||
shell: bash
|
||||
run: |
|
||||
make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Windows" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
|
||||
|
||||
- name: Linter and test
|
||||
shell: powershell
|
||||
@ -206,10 +206,10 @@ jobs:
|
||||
npm config set registry ${{ secrets.NPM_PROXY }} --global
|
||||
yarn config set registry ${{ secrets.NPM_PROXY }} --global
|
||||
make test
|
||||
# env:
|
||||
# TURBO_API: "${{ secrets.TURBO_API }}"
|
||||
# TURBO_TEAM: "windows"
|
||||
# TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
|
||||
env:
|
||||
TURBO_API: "${{ secrets.TURBO_API }}"
|
||||
TURBO_TEAM: "windows"
|
||||
TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
|
||||
|
||||
test-on-windows-pr-target:
|
||||
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository
|
||||
@ -266,20 +266,20 @@ jobs:
|
||||
rm -rf ~/jan
|
||||
make clean
|
||||
|
||||
# - name: Get Commit Message for PR
|
||||
# if : github.event_name == 'pull_request'
|
||||
# run: |
|
||||
# echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}}" >> $GITHUB_ENV
|
||||
- name: Get Commit Message for PR
|
||||
if : github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}}" >> $GITHUB_ENV
|
||||
|
||||
# - name: Get Commit Message for push event
|
||||
# if : github.event_name == 'push'
|
||||
# run: |
|
||||
# echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}}" >> $GITHUB_ENV
|
||||
- name: Get Commit Message for push event
|
||||
if : github.event_name == 'push'
|
||||
run: |
|
||||
echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}}" >> $GITHUB_ENV
|
||||
|
||||
# - name: "Config report portal"
|
||||
# shell: bash
|
||||
# run: |
|
||||
# make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Linux" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
|
||||
- name: "Config report portal"
|
||||
shell: bash
|
||||
run: |
|
||||
make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Linux" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
|
||||
|
||||
- name: Linter and test
|
||||
run: |
|
||||
@ -288,10 +288,10 @@ jobs:
|
||||
npm config set registry ${{ secrets.NPM_PROXY }} --global
|
||||
yarn config set registry ${{ secrets.NPM_PROXY }} --global
|
||||
make test
|
||||
# env:
|
||||
# TURBO_API: "${{ secrets.TURBO_API }}"
|
||||
# TURBO_TEAM: "linux"
|
||||
# TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
|
||||
env:
|
||||
TURBO_API: "${{ secrets.TURBO_API }}"
|
||||
TURBO_TEAM: "linux"
|
||||
TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
|
||||
|
||||
test-on-ubuntu-pr-target:
|
||||
runs-on: [self-hosted, Linux, ubuntu-desktop]
|
||||
|
||||
44
.github/workflows/template-build-macos-arm64.yml
vendored
44
.github/workflows/template-build-macos-arm64.yml
vendored
@ -41,7 +41,7 @@ on:
|
||||
|
||||
jobs:
|
||||
build-macos:
|
||||
runs-on: macos-silicon
|
||||
runs-on: macos-latest
|
||||
environment: production
|
||||
permissions:
|
||||
contents: write
|
||||
@ -55,15 +55,9 @@ jobs:
|
||||
uses: actions/setup-node@v1
|
||||
with:
|
||||
node-version: 20
|
||||
- name: Unblock keychain
|
||||
run: |
|
||||
security unlock-keychain -p ${{ secrets.KEYCHAIN_PASSWORD }} ~/Library/Keychains/login.keychain-db
|
||||
# - uses: actions/setup-python@v5
|
||||
# with:
|
||||
# python-version: '3.11'
|
||||
|
||||
# - name: Install jq
|
||||
# uses: dcarbone/install-jq-action@v2.0.1
|
||||
- name: Install jq
|
||||
uses: dcarbone/install-jq-action@v2.0.1
|
||||
|
||||
- name: Update app version based on latest release tag with build number
|
||||
if: inputs.public_provider != 'github'
|
||||
@ -101,17 +95,17 @@ jobs:
|
||||
env:
|
||||
VERSION_TAG: ${{ inputs.new_version }}
|
||||
|
||||
# - name: Get Cer for code signing
|
||||
# run: base64 -d <<< "$CODE_SIGN_P12_BASE64" > /tmp/codesign.p12
|
||||
# shell: bash
|
||||
# env:
|
||||
# CODE_SIGN_P12_BASE64: ${{ secrets.CODE_SIGN_P12_BASE64 }}
|
||||
- name: Get Cer for code signing
|
||||
run: base64 -d <<< "$CODE_SIGN_P12_BASE64" > /tmp/codesign.p12
|
||||
shell: bash
|
||||
env:
|
||||
CODE_SIGN_P12_BASE64: ${{ secrets.CODE_SIGN_P12_BASE64 }}
|
||||
|
||||
# - uses: apple-actions/import-codesign-certs@v2
|
||||
# continue-on-error: true
|
||||
# with:
|
||||
# p12-file-base64: ${{ secrets.CODE_SIGN_P12_BASE64 }}
|
||||
# p12-password: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
|
||||
- uses: apple-actions/import-codesign-certs@v2
|
||||
continue-on-error: true
|
||||
with:
|
||||
p12-file-base64: ${{ secrets.CODE_SIGN_P12_BASE64 }}
|
||||
p12-password: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
|
||||
|
||||
- name: Build and publish app to cloudflare r2 or github artifactory
|
||||
if: inputs.public_provider != 'github'
|
||||
@ -125,9 +119,9 @@ jobs:
|
||||
fi
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
# CSC_LINK: "/tmp/codesign.p12"
|
||||
# CSC_KEY_PASSWORD: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
|
||||
# CSC_IDENTITY_AUTO_DISCOVERY: "true"
|
||||
CSC_LINK: "/tmp/codesign.p12"
|
||||
CSC_KEY_PASSWORD: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
|
||||
CSC_IDENTITY_AUTO_DISCOVERY: "true"
|
||||
APPLE_ID: ${{ secrets.APPLE_ID }}
|
||||
APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
|
||||
APP_PATH: "."
|
||||
@ -143,9 +137,9 @@ jobs:
|
||||
make build-and-publish
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
# CSC_LINK: "/tmp/codesign.p12"
|
||||
# CSC_KEY_PASSWORD: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
|
||||
# CSC_IDENTITY_AUTO_DISCOVERY: "true"
|
||||
CSC_LINK: "/tmp/codesign.p12"
|
||||
CSC_KEY_PASSWORD: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
|
||||
CSC_IDENTITY_AUTO_DISCOVERY: "true"
|
||||
APPLE_ID: ${{ secrets.APPLE_ID }}
|
||||
APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
|
||||
APP_PATH: "."
|
||||
|
||||
@ -159,4 +159,3 @@ jobs:
|
||||
with:
|
||||
name: latest-mac-x64
|
||||
path: ./electron/dist/latest-mac.yml
|
||||
|
||||
|
||||
@ -68,14 +68,19 @@ export function requestInference(
|
||||
let cachedLines = ''
|
||||
for (const line of lines) {
|
||||
try {
|
||||
const toParse = cachedLines + line
|
||||
if (!line.includes('data: [DONE]')) {
|
||||
const data = JSON.parse(toParse.replace('data: ', ''))
|
||||
content += data.choices[0]?.delta?.content ?? ''
|
||||
if (content.startsWith('assistant: ')) {
|
||||
content = content.replace('assistant: ', '')
|
||||
if (transformResponse) {
|
||||
content += transformResponse(line)
|
||||
subscriber.next(content ?? '')
|
||||
} else {
|
||||
const toParse = cachedLines + line
|
||||
if (!line.includes('data: [DONE]')) {
|
||||
const data = JSON.parse(toParse.replace('data: ', ''))
|
||||
content += data.choices[0]?.delta?.content ?? ''
|
||||
if (content.startsWith('assistant: ')) {
|
||||
content = content.replace('assistant: ', '')
|
||||
}
|
||||
if (content !== '') subscriber.next(content)
|
||||
}
|
||||
if (content !== '') subscriber.next(content)
|
||||
}
|
||||
} catch {
|
||||
cachedLines = line
|
||||
|
||||
@ -9,11 +9,11 @@ export const SUPPORTED_MODEL_FORMAT = '.gguf'
|
||||
// The URL for the Nitro subprocess
|
||||
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
|
||||
// The URL for the Nitro subprocess to load a model
|
||||
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
|
||||
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
|
||||
// The URL for the Nitro subprocess to validate a model
|
||||
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
|
||||
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
|
||||
|
||||
// The URL for the Nitro subprocess to kill itself
|
||||
export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
|
||||
|
||||
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url
|
||||
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
|
||||
|
||||
@ -144,12 +144,12 @@ const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSe
|
||||
}
|
||||
|
||||
const spawnNitroProcess = async (): Promise<void> => {
|
||||
log(`[SERVER]::Debug: Spawning Nitro subprocess...`)
|
||||
log(`[SERVER]::Debug: Spawning cortex subprocess...`)
|
||||
|
||||
let binaryFolder = join(
|
||||
getJanExtensionsPath(),
|
||||
'@janhq',
|
||||
'inference-nitro-extension',
|
||||
'inference-cortex-extension',
|
||||
'dist',
|
||||
'bin'
|
||||
)
|
||||
@ -160,7 +160,7 @@ const spawnNitroProcess = async (): Promise<void> => {
|
||||
const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
|
||||
// Execute the binary
|
||||
log(
|
||||
`[SERVER]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||
`[SERVER]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||
)
|
||||
subprocess = spawn(
|
||||
executableOptions.executablePath,
|
||||
@ -184,12 +184,12 @@ const spawnNitroProcess = async (): Promise<void> => {
|
||||
})
|
||||
|
||||
subprocess.on('close', (code: any) => {
|
||||
log(`[SERVER]::Debug: Nitro exited with code: ${code}`)
|
||||
log(`[SERVER]::Debug: cortex exited with code: ${code}`)
|
||||
subprocess = undefined
|
||||
})
|
||||
|
||||
tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
|
||||
log(`[SERVER]::Debug: Nitro is ready`)
|
||||
log(`[SERVER]::Debug: cortex is ready`)
|
||||
})
|
||||
}
|
||||
|
||||
@ -203,13 +203,13 @@ const executableNitroFile = (): NitroExecutableOptions => {
|
||||
let binaryFolder = join(
|
||||
getJanExtensionsPath(),
|
||||
'@janhq',
|
||||
'inference-nitro-extension',
|
||||
'inference-cortex-extension',
|
||||
'dist',
|
||||
'bin'
|
||||
)
|
||||
|
||||
let cudaVisibleDevices = ''
|
||||
let binaryName = 'nitro'
|
||||
let binaryName = 'cortex-cpp'
|
||||
/**
|
||||
* The binary folder is different for each platform.
|
||||
*/
|
||||
@ -228,12 +228,16 @@ const executableNitroFile = (): NitroExecutableOptions => {
|
||||
}
|
||||
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
|
||||
}
|
||||
binaryName = 'nitro.exe'
|
||||
binaryName = 'cortex-cpp.exe'
|
||||
} else if (process.platform === 'darwin') {
|
||||
/**
|
||||
* For MacOS: mac-universal both Silicon and InteL
|
||||
*/
|
||||
binaryFolder = join(binaryFolder, 'mac-universal')
|
||||
if(process.arch === 'arm64') {
|
||||
binaryFolder = join(binaryFolder, 'mac-arm64')
|
||||
} else {
|
||||
binaryFolder = join(binaryFolder, 'mac-amd64')
|
||||
}
|
||||
} else {
|
||||
/**
|
||||
* For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
|
||||
@ -300,7 +304,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
|
||||
retryDelay: 500,
|
||||
})
|
||||
.then((res: any) => {
|
||||
log(`[SERVER]::Debug: Load model success with response ${JSON.stringify(res)}`)
|
||||
log(`[SERVER]::Debug: Load model request with response ${JSON.stringify(res)}`)
|
||||
return Promise.resolve(res)
|
||||
})
|
||||
.catch((err: any) => {
|
||||
@ -327,7 +331,7 @@ export const stopModel = async (_modelId: string) => {
|
||||
})
|
||||
}, 5000)
|
||||
const tcpPortUsed = require('tcp-port-used')
|
||||
log(`[SERVER]::Debug: Request to kill Nitro`)
|
||||
log(`[SERVER]::Debug: Request to kill cortex`)
|
||||
|
||||
fetch(NITRO_HTTP_KILL_URL, {
|
||||
method: 'DELETE',
|
||||
|
||||
@ -4,7 +4,7 @@ import { log } from './logger'
|
||||
|
||||
export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
|
||||
const cpu = await physicalCpuCount()
|
||||
log(`[NITRO]::CPU information - ${cpu}`)
|
||||
log(`[CORTEX]::CPU information - ${cpu}`)
|
||||
|
||||
return {
|
||||
numCpuPhysicalCore: cpu,
|
||||
|
||||
@ -19,6 +19,7 @@ export enum NativeRoute {
|
||||
showMainWindow = 'showMainWindow',
|
||||
|
||||
quickAskSizeUpdated = 'quickAskSizeUpdated',
|
||||
ackDeepLink = 'ackDeepLink',
|
||||
}
|
||||
|
||||
/**
|
||||
@ -45,6 +46,8 @@ export enum AppEvent {
|
||||
|
||||
onUserSubmitQuickAsk = 'onUserSubmitQuickAsk',
|
||||
onSelectedText = 'onSelectedText',
|
||||
|
||||
onDeepLink = 'onDeepLink',
|
||||
}
|
||||
|
||||
export enum DownloadRoute {
|
||||
|
||||
@ -151,4 +151,8 @@ export function handleAppIPCs() {
|
||||
async (_event, heightOffset: number): Promise<void> =>
|
||||
windowManager.expandQuickAskWindow(heightOffset)
|
||||
)
|
||||
|
||||
ipcMain.handle(NativeRoute.ackDeepLink, async (_event): Promise<void> => {
|
||||
windowManager.ackDeepLink()
|
||||
})
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { app, BrowserWindow } from 'electron'
|
||||
|
||||
import { join } from 'path'
|
||||
import { join, resolve } from 'path'
|
||||
/**
|
||||
* Managers
|
||||
**/
|
||||
@ -39,15 +39,44 @@ const quickAskUrl = `${mainUrl}/search`
|
||||
|
||||
const gotTheLock = app.requestSingleInstanceLock()
|
||||
|
||||
if (process.defaultApp) {
|
||||
if (process.argv.length >= 2) {
|
||||
app.setAsDefaultProtocolClient('jan', process.execPath, [
|
||||
resolve(process.argv[1]),
|
||||
])
|
||||
}
|
||||
} else {
|
||||
app.setAsDefaultProtocolClient('jan')
|
||||
}
|
||||
|
||||
const createMainWindow = () => {
|
||||
const startUrl = app.isPackaged ? `file://${mainPath}` : mainUrl
|
||||
windowManager.createMainWindow(preloadPath, startUrl)
|
||||
}
|
||||
|
||||
app
|
||||
.whenReady()
|
||||
.then(() => {
|
||||
if (!gotTheLock) {
|
||||
app.quit()
|
||||
throw new Error('Another instance of the app is already running')
|
||||
} else {
|
||||
app.on(
|
||||
'second-instance',
|
||||
(_event, commandLine, _workingDirectory): void => {
|
||||
if (process.platform === 'win32' || process.platform === 'linux') {
|
||||
// this is for handling deeplink on windows and linux
|
||||
// since those OS will emit second-instance instead of open-url
|
||||
const url = commandLine.pop()
|
||||
if (url) {
|
||||
windowManager.sendMainAppDeepLink(url)
|
||||
}
|
||||
}
|
||||
windowManager.showMainWindow()
|
||||
}
|
||||
)
|
||||
}
|
||||
})
|
||||
.then(setupReactDevTool)
|
||||
.then(setupCore)
|
||||
.then(createUserSpace)
|
||||
.then(migrateExtensions)
|
||||
@ -60,6 +89,7 @@ app
|
||||
.then(registerGlobalShortcuts)
|
||||
.then(() => {
|
||||
if (!app.isPackaged) {
|
||||
setupReactDevTool()
|
||||
windowManager.mainWindow?.webContents.openDevTools()
|
||||
}
|
||||
})
|
||||
@ -75,11 +105,11 @@ app
|
||||
})
|
||||
})
|
||||
|
||||
app.on('second-instance', (_event, _commandLine, _workingDirectory) => {
|
||||
windowManager.showMainWindow()
|
||||
app.on('open-url', (_event, url) => {
|
||||
windowManager.sendMainAppDeepLink(url)
|
||||
})
|
||||
|
||||
app.on('before-quit', function (evt) {
|
||||
app.on('before-quit', function (_event) {
|
||||
trayManager.destroyCurrentTray()
|
||||
})
|
||||
|
||||
@ -104,11 +134,6 @@ function createQuickAskWindow() {
|
||||
windowManager.createQuickAskWindow(preloadPath, startUrl)
|
||||
}
|
||||
|
||||
function createMainWindow() {
|
||||
const startUrl = app.isPackaged ? `file://${mainPath}` : mainUrl
|
||||
windowManager.createMainWindow(preloadPath, startUrl)
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles various IPC messages from the renderer process.
|
||||
*/
|
||||
|
||||
@ -14,9 +14,9 @@ class WindowManager {
|
||||
private _quickAskWindowVisible = false
|
||||
private _mainWindowVisible = false
|
||||
|
||||
private deeplink: string | undefined
|
||||
/**
|
||||
* Creates a new window instance.
|
||||
* @param {Electron.BrowserWindowConstructorOptions} options - The options to create the window with.
|
||||
* @returns The created window instance.
|
||||
*/
|
||||
createMainWindow(preloadPath: string, startUrl: string) {
|
||||
@ -29,6 +29,17 @@ class WindowManager {
|
||||
},
|
||||
})
|
||||
|
||||
if (process.platform === 'win32' || process.platform === 'linux') {
|
||||
/// This is work around for windows deeplink.
|
||||
/// second-instance event is not fired when app is not open, so the app
|
||||
/// does not received the deeplink.
|
||||
const commandLine = process.argv.slice(1)
|
||||
if (commandLine.length > 0) {
|
||||
const url = commandLine[0]
|
||||
this.sendMainAppDeepLink(url)
|
||||
}
|
||||
}
|
||||
|
||||
/* Load frontend app to the window */
|
||||
this.mainWindow.loadURL(startUrl)
|
||||
|
||||
@ -123,6 +134,22 @@ class WindowManager {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to send the deep link to the main app.
|
||||
*/
|
||||
sendMainAppDeepLink(url: string): void {
|
||||
this.deeplink = url
|
||||
const interval = setInterval(() => {
|
||||
if (!this.deeplink) clearInterval(interval)
|
||||
const mainWindow = this.mainWindow
|
||||
if (mainWindow) {
|
||||
mainWindow.webContents.send(AppEvent.onDeepLink, this.deeplink)
|
||||
if (mainWindow.isMinimized()) mainWindow.restore()
|
||||
mainWindow.focus()
|
||||
}
|
||||
}, 500)
|
||||
}
|
||||
|
||||
cleanUp(): void {
|
||||
if (!this.mainWindow?.isDestroyed()) {
|
||||
this.mainWindow?.close()
|
||||
@ -137,6 +164,13 @@ class WindowManager {
|
||||
this._quickAskWindowVisible = false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Acknowledges that the window has received a deep link. We can remove it.
|
||||
*/
|
||||
ackDeepLink() {
|
||||
this.deeplink = undefined
|
||||
}
|
||||
}
|
||||
|
||||
export const windowManager = new WindowManager()
|
||||
|
||||
@ -61,6 +61,14 @@
|
||||
"include": "scripts/uninstaller.nsh",
|
||||
"deleteAppDataOnUninstall": true
|
||||
},
|
||||
"protocols": [
|
||||
{
|
||||
"name": "Jan",
|
||||
"schemes": [
|
||||
"jan"
|
||||
]
|
||||
}
|
||||
],
|
||||
"artifactName": "jan-${os}-${arch}-${version}.${ext}"
|
||||
},
|
||||
"scripts": {
|
||||
@ -96,7 +104,7 @@
|
||||
"request": "^2.88.2",
|
||||
"request-progress": "^3.0.0",
|
||||
"ulidx": "^2.3.0",
|
||||
"@nut-tree/nut-js": "^4.0.0"
|
||||
"@kirillvakalov/nut-tree__nut-js": "4.2.1-2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@electron/notarize": "^2.1.0",
|
||||
|
||||
@ -1,17 +1,13 @@
|
||||
import { app } from 'electron'
|
||||
|
||||
export const setupReactDevTool = async () => {
|
||||
if (!app.isPackaged) {
|
||||
// Which means you're running from source code
|
||||
const { default: installExtension, REACT_DEVELOPER_TOOLS } = await import(
|
||||
'electron-devtools-installer'
|
||||
) // Don't use import on top level, since the installer package is dev-only
|
||||
try {
|
||||
const name = await installExtension(REACT_DEVELOPER_TOOLS)
|
||||
console.debug(`Added Extension: ${name}`)
|
||||
} catch (err) {
|
||||
console.error('An error occurred while installing devtools:', err)
|
||||
// Only log the error and don't throw it because it's not critical
|
||||
}
|
||||
// Which means you're running from source code
|
||||
const { default: installExtension, REACT_DEVELOPER_TOOLS } = await import(
|
||||
'electron-devtools-installer'
|
||||
) // Don't use import on top level, since the installer package is dev-only
|
||||
try {
|
||||
const name = await installExtension(REACT_DEVELOPER_TOOLS)
|
||||
console.debug(`Added Extension: ${name}`)
|
||||
} catch (err) {
|
||||
console.error('An error occurred while installing devtools:', err)
|
||||
// Only log the error and don't throw it because it's not critical
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { clipboard, globalShortcut } from 'electron'
|
||||
import { keyboard, Key } from '@nut-tree/nut-js'
|
||||
import { keyboard, Key } from "@kirillvakalov/nut-tree__nut-js"
|
||||
|
||||
/**
|
||||
* Gets selected text by synthesizing the keyboard shortcut
|
||||
|
||||
@ -10,11 +10,12 @@ export function toolRetrievalUpdateTextSplitter(
|
||||
}
|
||||
export async function toolRetrievalIngestNewDocument(
|
||||
file: string,
|
||||
model: string,
|
||||
engine: string
|
||||
) {
|
||||
const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file))
|
||||
const threadPath = path.dirname(filePath.replace('files', ''))
|
||||
retrieval.updateEmbeddingEngine(engine)
|
||||
retrieval.updateEmbeddingEngine(model, engine)
|
||||
return retrieval
|
||||
.ingestAgentKnowledge(filePath, `${threadPath}/memory`)
|
||||
.catch((err) => {
|
||||
|
||||
@ -28,14 +28,14 @@ export class Retrieval {
|
||||
})
|
||||
}
|
||||
|
||||
public updateEmbeddingEngine(engine: string): void {
|
||||
public updateEmbeddingEngine(model: string, engine: string): void {
|
||||
// Engine settings are not compatible with the current embedding model params
|
||||
// Switch case manually for now
|
||||
if (engine === 'nitro') {
|
||||
this.embeddingModel = new OpenAIEmbeddings(
|
||||
{ openAIApiKey: 'nitro-embedding' },
|
||||
{ openAIApiKey: 'nitro-embedding', model },
|
||||
// TODO: Raw settings
|
||||
{ basePath: 'http://127.0.0.1:3928/v1' }
|
||||
{ basePath: 'http://127.0.0.1:3928/v1' },
|
||||
)
|
||||
} else {
|
||||
// Fallback to OpenAI Settings
|
||||
|
||||
@ -36,6 +36,7 @@ export class RetrievalTool extends InferenceTool {
|
||||
NODE,
|
||||
'toolRetrievalIngestNewDocument',
|
||||
docFile,
|
||||
data.model?.id,
|
||||
data.model?.engine
|
||||
)
|
||||
} else {
|
||||
|
||||
79
extensions/inference-anthropic-extension/README.md
Normal file
79
extensions/inference-anthropic-extension/README.md
Normal file
@ -0,0 +1,79 @@
|
||||
# Anthropic Engine Extension
|
||||
|
||||
Created using Jan extension example
|
||||
|
||||
# Create a Jan Extension using Typescript
|
||||
|
||||
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
|
||||
|
||||
## Create Your Own Extension
|
||||
|
||||
To create your own extension, you can use this repository as a template! Just follow the below instructions:
|
||||
|
||||
1. Click the Use this template button at the top of the repository
|
||||
2. Select Create a new repository
|
||||
3. Select an owner and name for your new repository
|
||||
4. Click Create repository
|
||||
5. Clone your new repository
|
||||
|
||||
## Initial Setup
|
||||
|
||||
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
|
||||
|
||||
> [!NOTE]
|
||||
>
|
||||
> You'll need to have a reasonably modern version of
|
||||
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
|
||||
> [`nodenv`](https://github.com/nodenv/nodenv) or
|
||||
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
|
||||
> root of your repository to install the version specified in
|
||||
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
|
||||
|
||||
1. :hammer_and_wrench: Install the dependencies
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
1. :building_construction: Package the TypeScript for distribution
|
||||
|
||||
```bash
|
||||
npm run bundle
|
||||
```
|
||||
|
||||
1. :white_check_mark: Check your artifact
|
||||
|
||||
There will be a tgz file in your extension directory now
|
||||
|
||||
## Update the Extension Metadata
|
||||
|
||||
The [`package.json`](package.json) file defines metadata about your extension, such as
|
||||
extension name, main entry, description and version.
|
||||
|
||||
When you copy this repository, update `package.json` with the name, description for your extension.
|
||||
|
||||
## Update the Extension Code
|
||||
|
||||
The [`src/`](./src/) directory is the heart of your extension! This contains the
|
||||
source code that will be run when your extension functions are invoked. You can replace the
|
||||
contents of this directory with your own code.
|
||||
|
||||
There are a few things to keep in mind when writing your extension code:
|
||||
|
||||
- Most Jan Extension functions are processed asynchronously.
|
||||
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
|
||||
|
||||
```typescript
|
||||
import { events, MessageEvent, MessageRequest } from '@janhq/core'
|
||||
|
||||
function onStart(): Promise<any> {
|
||||
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
|
||||
this.inference(data)
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
For more information about the Jan Extension Core module, see the
|
||||
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
|
||||
|
||||
So, what are you waiting for? Go ahead and start customizing your extension!
|
||||
43
extensions/inference-anthropic-extension/package.json
Normal file
43
extensions/inference-anthropic-extension/package.json
Normal file
@ -0,0 +1,43 @@
|
||||
{
|
||||
"name": "@janhq/inference-anthropic-extension",
|
||||
"productName": "Anthropic Inference Engine",
|
||||
"version": "1.0.0",
|
||||
"description": "This extension enables Anthropic chat completion API calls",
|
||||
"main": "dist/index.js",
|
||||
"module": "dist/module.js",
|
||||
"engine": "anthropic",
|
||||
"author": "Jan <service@jan.ai>",
|
||||
"license": "AGPL-3.0",
|
||||
"scripts": {
|
||||
"build": "tsc -b . && webpack --config webpack.config.js",
|
||||
"build:publish": "rimraf *.tgz --glob && yarn build && npm pack && cpx *.tgz ../../pre-install",
|
||||
"sync:core": "cd ../.. && yarn build:core && cd extensions && rm yarn.lock && cd inference-anthropic-extension && yarn && yarn build:publish"
|
||||
},
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
"./main": "./dist/module.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"cpx": "^1.5.0",
|
||||
"rimraf": "^3.0.2",
|
||||
"webpack": "^5.88.2",
|
||||
"webpack-cli": "^5.1.4",
|
||||
"ts-loader": "^9.5.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@janhq/core": "file:../../core",
|
||||
"fetch-retry": "^5.0.6",
|
||||
"ulidx": "^2.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"files": [
|
||||
"dist/*",
|
||||
"package.json",
|
||||
"README.md"
|
||||
],
|
||||
"bundleDependencies": [
|
||||
"fetch-retry"
|
||||
]
|
||||
}
|
||||
@ -0,0 +1,83 @@
|
||||
[
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://www.anthropic.com/"
|
||||
}
|
||||
],
|
||||
"id": "claude-3-opus-20240229",
|
||||
"object": "model",
|
||||
"name": "Claude 3 Opus",
|
||||
"version": "1.0",
|
||||
"description": "Claude 3 Opus is a powerful model suitables for highly complex task.",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7,
|
||||
"stream": false
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Anthropic",
|
||||
"tags": [
|
||||
"General",
|
||||
"Big Context Length"
|
||||
]
|
||||
},
|
||||
"engine": "anthropic"
|
||||
},
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://www.anthropic.com/"
|
||||
}
|
||||
],
|
||||
"id": "claude-3-sonnet-20240229",
|
||||
"object": "model",
|
||||
"name": "Claude 3 Sonnet",
|
||||
"version": "1.0",
|
||||
"description": "Claude 3 Sonnet is an ideal model balance of intelligence and speed for enterprise workloads.",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7,
|
||||
"stream": false
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Anthropic",
|
||||
"tags": [
|
||||
"General",
|
||||
"Big Context Length"
|
||||
]
|
||||
},
|
||||
"engine": "anthropic"
|
||||
},
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://www.anthropic.com/"
|
||||
}
|
||||
],
|
||||
"id": "claude-3-haiku-20240307",
|
||||
"object": "model",
|
||||
"name": "Claude 3 Haiku",
|
||||
"version": "1.0",
|
||||
"description": "Claude 3 Haiku is the fastest model provides near-instant responsiveness.",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7,
|
||||
"stream": false
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Anthropic",
|
||||
"tags": [
|
||||
"General",
|
||||
"Big Context Length"
|
||||
]
|
||||
},
|
||||
"engine": "anthropic"
|
||||
}
|
||||
]
|
||||
@ -0,0 +1,23 @@
|
||||
[
|
||||
{
|
||||
"key": "chat-completions-endpoint",
|
||||
"title": "Chat Completions Endpoint",
|
||||
"description": "The endpoint to use for chat completions. See the [Anthropic API documentation](https://docs.anthropic.com/claude/docs/intro-to-claude) for more information.",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"placeholder": "https://api.anthropic.com/v1/messages",
|
||||
"value": "https://api.anthropic.com/v1/messages"
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "anthropic-api-key",
|
||||
"title": "API Key",
|
||||
"description": "The Anthropic API uses API keys for authentication. Visit your [API Keys](https://console.anthropic.com/settings/keys) page to retrieve the API key you'll use in your requests.",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"placeholder": "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"value": "",
|
||||
"type": "password"
|
||||
}
|
||||
}
|
||||
]
|
||||
124
extensions/inference-anthropic-extension/src/index.ts
Normal file
124
extensions/inference-anthropic-extension/src/index.ts
Normal file
@ -0,0 +1,124 @@
|
||||
/**
|
||||
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
* @version 1.0.0
|
||||
* @module inference-anthropic-extension/src/index
|
||||
*/
|
||||
|
||||
import { RemoteOAIEngine } from '@janhq/core'
|
||||
import { PayloadType } from '@janhq/core'
|
||||
import { ChatCompletionRole } from '@janhq/core'
|
||||
|
||||
declare const SETTINGS: Array<any>
|
||||
declare const MODELS: Array<any>
|
||||
|
||||
enum Settings {
|
||||
apiKey = 'anthropic-api-key',
|
||||
chatCompletionsEndPoint = 'chat-completions-endpoint',
|
||||
}
|
||||
|
||||
type AnthropicPayloadType = {
|
||||
model?: string
|
||||
max_tokens?: number
|
||||
messages?: Array<{ role: string; content: string }>
|
||||
}
|
||||
|
||||
/**
|
||||
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
*/
|
||||
export default class JanInferenceAnthropicExtension extends RemoteOAIEngine {
|
||||
inferenceUrl: string = ''
|
||||
provider: string = 'anthropic'
|
||||
maxTokens: number = 4096
|
||||
|
||||
override async onLoad(): Promise<void> {
|
||||
super.onLoad()
|
||||
|
||||
// Register Settings
|
||||
this.registerSettings(SETTINGS)
|
||||
this.registerModels(MODELS)
|
||||
|
||||
this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
|
||||
this.inferenceUrl = await this.getSetting<string>(
|
||||
Settings.chatCompletionsEndPoint,
|
||||
''
|
||||
)
|
||||
|
||||
if (this.inferenceUrl.length === 0) {
|
||||
SETTINGS.forEach((setting) => {
|
||||
if (setting.key === Settings.chatCompletionsEndPoint) {
|
||||
this.inferenceUrl = setting.controllerProps.value as string
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Override the headers method to include the x-API-key in the request headers
|
||||
override async headers(): Promise<HeadersInit> {
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
'x-api-key': this.apiKey,
|
||||
'anthropic-version': '2023-06-01',
|
||||
}
|
||||
}
|
||||
|
||||
onSettingUpdate<T>(key: string, value: T): void {
|
||||
if (key === Settings.apiKey) {
|
||||
this.apiKey = value as string
|
||||
} else if (key === Settings.chatCompletionsEndPoint) {
|
||||
if (typeof value !== 'string') return
|
||||
|
||||
if (value.trim().length === 0) {
|
||||
SETTINGS.forEach((setting) => {
|
||||
if (setting.key === Settings.chatCompletionsEndPoint) {
|
||||
this.inferenceUrl = setting.controllerProps.value as string
|
||||
}
|
||||
})
|
||||
} else {
|
||||
this.inferenceUrl = value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Override the transformPayload method to convert the payload to the required format
|
||||
transformPayload = (payload: PayloadType): AnthropicPayloadType => {
|
||||
if (!payload.messages || payload.messages.length === 0) {
|
||||
return { max_tokens: this.maxTokens, messages: [], model: payload.model }
|
||||
}
|
||||
|
||||
const convertedData: AnthropicPayloadType = {
|
||||
max_tokens: this.maxTokens,
|
||||
messages: [],
|
||||
model: payload.model,
|
||||
}
|
||||
|
||||
payload.messages.forEach((item, index) => {
|
||||
if (item.role === ChatCompletionRole.User) {
|
||||
convertedData.messages.push({
|
||||
role: 'user',
|
||||
content: item.content as string,
|
||||
})
|
||||
} else if (item.role === ChatCompletionRole.Assistant) {
|
||||
convertedData.messages.push({
|
||||
role: 'assistant',
|
||||
content: item.content as string,
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
return convertedData
|
||||
}
|
||||
|
||||
// Override the transformResponse method to convert the response to the required format
|
||||
transformResponse = (data: any): string => {
|
||||
if (data.content && data.content.length > 0 && data.content[0].text) {
|
||||
return data.content[0].text
|
||||
} else {
|
||||
console.error('Invalid response format:', data)
|
||||
return ''
|
||||
}
|
||||
}
|
||||
}
|
||||
14
extensions/inference-anthropic-extension/tsconfig.json
Normal file
14
extensions/inference-anthropic-extension/tsconfig.json
Normal file
@ -0,0 +1,14 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "es2016",
|
||||
"module": "ES6",
|
||||
"moduleResolution": "node",
|
||||
"outDir": "./dist",
|
||||
"esModuleInterop": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"strict": false,
|
||||
"skipLibCheck": true,
|
||||
"rootDir": "./src"
|
||||
},
|
||||
"include": ["./src"]
|
||||
}
|
||||
37
extensions/inference-anthropic-extension/webpack.config.js
Normal file
37
extensions/inference-anthropic-extension/webpack.config.js
Normal file
@ -0,0 +1,37 @@
|
||||
const webpack = require('webpack')
|
||||
const packageJson = require('./package.json')
|
||||
const settingJson = require('./resources/settings.json')
|
||||
const modelsJson = require('./resources/models.json')
|
||||
|
||||
module.exports = {
|
||||
experiments: { outputModule: true },
|
||||
entry: './src/index.ts', // Adjust the entry point to match your project's main file
|
||||
mode: 'production',
|
||||
module: {
|
||||
rules: [
|
||||
{
|
||||
test: /\.tsx?$/,
|
||||
use: 'ts-loader',
|
||||
exclude: /node_modules/,
|
||||
},
|
||||
],
|
||||
},
|
||||
plugins: [
|
||||
new webpack.DefinePlugin({
|
||||
MODELS: JSON.stringify(modelsJson),
|
||||
SETTINGS: JSON.stringify(settingJson),
|
||||
ENGINE: JSON.stringify(packageJson.engine),
|
||||
}),
|
||||
],
|
||||
output: {
|
||||
filename: 'index.js', // Adjust the output file name as needed
|
||||
library: { type: 'module' }, // Specify ESM output format
|
||||
},
|
||||
resolve: {
|
||||
extensions: ['.ts', '.js'],
|
||||
},
|
||||
optimization: {
|
||||
minimize: false,
|
||||
},
|
||||
// Add loaders and other configuration as needed for your project
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
[
|
||||
[
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
@ -19,7 +19,37 @@
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Cohere",
|
||||
"tags": ["General", "Big Context Length"]
|
||||
"tags": [
|
||||
"General",
|
||||
"Big Context Length"
|
||||
]
|
||||
},
|
||||
"engine": "cohere"
|
||||
},
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://cohere.com"
|
||||
}
|
||||
],
|
||||
"id": "command-r",
|
||||
"object": "model",
|
||||
"name": "Command R",
|
||||
"version": "1.0",
|
||||
"description": "Command R is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 128000,
|
||||
"temperature": 0.7,
|
||||
"stream": false
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Cohere",
|
||||
"tags": [
|
||||
"General",
|
||||
"Big Context Length"
|
||||
]
|
||||
},
|
||||
"engine": "cohere"
|
||||
}
|
||||
|
||||
@ -12,7 +12,7 @@
|
||||
{
|
||||
"key": "cohere-api-key",
|
||||
"title": "API Key",
|
||||
"description": "The Cohere API uses API keys for authentication. Visit your [API Keys](https://platform.openai.com/account/api-keys) page to retrieve the API key you'll use in your requests.",
|
||||
"description": "The Cohere API uses API keys for authentication. Visit your [API Keys](https://dashboard.cohere.com/api-keys) page to retrieve the API key you'll use in your requests.",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"placeholder": "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
* @version 1.0.0
|
||||
* @module inference-openai-extension/src/index
|
||||
* @module inference-cohere-extension/src/index
|
||||
*/
|
||||
|
||||
import { RemoteOAIEngine } from '@janhq/core'
|
||||
@ -26,8 +26,8 @@ enum RoleType {
|
||||
|
||||
type CoherePayloadType = {
|
||||
chat_history?: Array<{ role: RoleType; message: string }>
|
||||
message?: string,
|
||||
preamble?: string,
|
||||
message?: string
|
||||
preamble?: string
|
||||
}
|
||||
|
||||
/**
|
||||
@ -82,18 +82,24 @@ export default class JanInferenceCohereExtension extends RemoteOAIEngine {
|
||||
if (payload.messages.length === 0) {
|
||||
return {}
|
||||
}
|
||||
const convertedData:CoherePayloadType = {
|
||||
|
||||
const { messages, ...params } = payload
|
||||
const convertedData: CoherePayloadType = {
|
||||
...params,
|
||||
chat_history: [],
|
||||
message: '',
|
||||
}
|
||||
payload.messages.forEach((item, index) => {
|
||||
messages.forEach((item, index) => {
|
||||
// Assign the message of the last item to the `message` property
|
||||
if (index === payload.messages.length - 1) {
|
||||
if (index === messages.length - 1) {
|
||||
convertedData.message = item.content as string
|
||||
return
|
||||
}
|
||||
if (item.role === ChatCompletionRole.User) {
|
||||
convertedData.chat_history.push({ role: RoleType.user, message: item.content as string})
|
||||
convertedData.chat_history.push({
|
||||
role: RoleType.user,
|
||||
message: item.content as string,
|
||||
})
|
||||
} else if (item.role === ChatCompletionRole.Assistant) {
|
||||
convertedData.chat_history.push({
|
||||
role: RoleType.chatbot,
|
||||
@ -106,5 +112,7 @@ export default class JanInferenceCohereExtension extends RemoteOAIEngine {
|
||||
return convertedData
|
||||
}
|
||||
|
||||
transformResponse = (data: any) => data.text
|
||||
transformResponse = (data: any) => {
|
||||
return typeof data === 'object' ? data.text : JSON.parse(data).text ?? ''
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@janhq/inference-groq-extension",
|
||||
"productName": "Groq Inference Engine",
|
||||
"version": "1.0.0",
|
||||
"version": "1.0.1",
|
||||
"description": "This extension enables fast Groq chat completion API calls",
|
||||
"main": "dist/index.js",
|
||||
"module": "dist/module.js",
|
||||
|
||||
@ -8,22 +8,25 @@
|
||||
"id": "llama3-70b-8192",
|
||||
"object": "model",
|
||||
"name": "Groq Llama 3 70b",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Groq Llama 3 70b with supercharged speed!",
|
||||
"format": "api",
|
||||
"settings": {
|
||||
"text_model": false
|
||||
},
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 8192,
|
||||
"temperature": 0.7,
|
||||
"top_p": 1,
|
||||
"stop": null,
|
||||
"stream": true
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Meta",
|
||||
"tags": ["General", "Big Context Length"]
|
||||
"tags": [
|
||||
"General",
|
||||
"Big Context Length"
|
||||
]
|
||||
},
|
||||
"engine": "groq"
|
||||
},
|
||||
@ -36,22 +39,25 @@
|
||||
"id": "llama3-8b-8192",
|
||||
"object": "model",
|
||||
"name": "Groq Llama 3 8b",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Groq Llama 3 8b with supercharged speed!",
|
||||
"format": "api",
|
||||
"settings": {
|
||||
"text_model": false
|
||||
},
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 8192,
|
||||
"temperature": 0.7,
|
||||
"top_p": 1,
|
||||
"stop": null,
|
||||
"stream": true
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Meta",
|
||||
"tags": ["General", "Big Context Length"]
|
||||
"tags": [
|
||||
"General",
|
||||
"Big Context Length"
|
||||
]
|
||||
},
|
||||
"engine": "groq"
|
||||
},
|
||||
@ -64,50 +70,24 @@
|
||||
"id": "gemma-7b-it",
|
||||
"object": "model",
|
||||
"name": "Groq Gemma 7b Instruct",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Groq Gemma 7b Instruct with supercharged speed!",
|
||||
"format": "api",
|
||||
"settings": {
|
||||
"text_model": false
|
||||
},
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 8192,
|
||||
"temperature": 0.7,
|
||||
"top_p": 1,
|
||||
"stop": null,
|
||||
"stream": true
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Google",
|
||||
"tags": ["General"]
|
||||
},
|
||||
"engine": "groq"
|
||||
},
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://groq.com"
|
||||
}
|
||||
],
|
||||
"id": "llama2-70b-4096",
|
||||
"object": "model",
|
||||
"name": "Groq Llama 2 70b",
|
||||
"version": "1.0",
|
||||
"description": "Groq Llama 2 70b with supercharged speed!",
|
||||
"format": "api",
|
||||
"settings": {
|
||||
"text_model": false
|
||||
},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7,
|
||||
"top_p": 1,
|
||||
"stop": null,
|
||||
"stream": true
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Meta",
|
||||
"tags": ["General", "Big Context Length"]
|
||||
"tags": [
|
||||
"General"
|
||||
]
|
||||
},
|
||||
"engine": "groq"
|
||||
},
|
||||
@ -120,22 +100,25 @@
|
||||
"id": "mixtral-8x7b-32768",
|
||||
"object": "model",
|
||||
"name": "Groq Mixtral 8x7b Instruct",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Groq Mixtral 8x7b Instruct is Mixtral with supercharged speed!",
|
||||
"format": "api",
|
||||
"settings": {
|
||||
"text_model": false
|
||||
},
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 32768,
|
||||
"temperature": 0.7,
|
||||
"top_p": 1,
|
||||
"stop": null,
|
||||
"stream": true
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Mistral",
|
||||
"tags": ["General", "Big Context Length"]
|
||||
"tags": [
|
||||
"General",
|
||||
"Big Context Length"
|
||||
]
|
||||
},
|
||||
"engine": "groq"
|
||||
}
|
||||
|
||||
79
extensions/inference-martian-extension/README.md
Normal file
79
extensions/inference-martian-extension/README.md
Normal file
@ -0,0 +1,79 @@
|
||||
# Martian Engine Extension
|
||||
|
||||
Created using Jan extension example
|
||||
|
||||
# Create a Jan Extension using Typescript
|
||||
|
||||
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
|
||||
|
||||
## Create Your Own Extension
|
||||
|
||||
To create your own extension, you can use this repository as a template! Just follow the below instructions:
|
||||
|
||||
1. Click the Use this template button at the top of the repository
|
||||
2. Select Create a new repository
|
||||
3. Select an owner and name for your new repository
|
||||
4. Click Create repository
|
||||
5. Clone your new repository
|
||||
|
||||
## Initial Setup
|
||||
|
||||
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
|
||||
|
||||
> [!NOTE]
|
||||
>
|
||||
> You'll need to have a reasonably modern version of
|
||||
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
|
||||
> [`nodenv`](https://github.com/nodenv/nodenv) or
|
||||
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
|
||||
> root of your repository to install the version specified in
|
||||
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
|
||||
|
||||
1. :hammer_and_wrench: Install the dependencies
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
1. :building_construction: Package the TypeScript for distribution
|
||||
|
||||
```bash
|
||||
npm run bundle
|
||||
```
|
||||
|
||||
1. :white_check_mark: Check your artifact
|
||||
|
||||
There will be a tgz file in your extension directory now
|
||||
|
||||
## Update the Extension Metadata
|
||||
|
||||
The [`package.json`](package.json) file defines metadata about your extension, such as
|
||||
extension name, main entry, description and version.
|
||||
|
||||
When you copy this repository, update `package.json` with the name, description for your extension.
|
||||
|
||||
## Update the Extension Code
|
||||
|
||||
The [`src/`](./src/) directory is the heart of your extension! This contains the
|
||||
source code that will be run when your extension functions are invoked. You can replace the
|
||||
contents of this directory with your own code.
|
||||
|
||||
There are a few things to keep in mind when writing your extension code:
|
||||
|
||||
- Most Jan Extension functions are processed asynchronously.
|
||||
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
|
||||
|
||||
```typescript
|
||||
import { events, MessageEvent, MessageRequest } from '@janhq/core'
|
||||
|
||||
function onStart(): Promise<any> {
|
||||
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
|
||||
this.inference(data)
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
For more information about the Jan Extension Core module, see the
|
||||
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
|
||||
|
||||
So, what are you waiting for? Go ahead and start customizing your extension!
|
||||
42
extensions/inference-martian-extension/package.json
Normal file
42
extensions/inference-martian-extension/package.json
Normal file
@ -0,0 +1,42 @@
|
||||
{
|
||||
"name": "@janhq/inference-martian-extension",
|
||||
"productName": "Martian Inference Engine",
|
||||
"version": "1.0.1",
|
||||
"description": "This extension enables Martian chat completion API calls",
|
||||
"main": "dist/index.js",
|
||||
"module": "dist/module.js",
|
||||
"engine": "martian",
|
||||
"author": "Jan <service@jan.ai>",
|
||||
"license": "AGPL-3.0",
|
||||
"scripts": {
|
||||
"build": "tsc -b . && webpack --config webpack.config.js",
|
||||
"build:publish": "rimraf *.tgz --glob && yarn build && npm pack && cpx *.tgz ../../pre-install"
|
||||
},
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
"./main": "./dist/module.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"cpx": "^1.5.0",
|
||||
"rimraf": "^3.0.2",
|
||||
"webpack": "^5.88.2",
|
||||
"webpack-cli": "^5.1.4",
|
||||
"ts-loader": "^9.5.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@janhq/core": "file:../../core",
|
||||
"fetch-retry": "^5.0.6",
|
||||
"ulidx": "^2.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"files": [
|
||||
"dist/*",
|
||||
"package.json",
|
||||
"README.md"
|
||||
],
|
||||
"bundleDependencies": [
|
||||
"fetch-retry"
|
||||
]
|
||||
}
|
||||
32
extensions/inference-martian-extension/resources/models.json
Normal file
32
extensions/inference-martian-extension/resources/models.json
Normal file
@ -0,0 +1,32 @@
|
||||
[
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://withmartian.com/"
|
||||
}
|
||||
],
|
||||
"id": "router",
|
||||
"object": "model",
|
||||
"name": "Martian Model Router",
|
||||
"version": "1.0",
|
||||
"description": "Martian Model Router dynamically routes requests to the best LLM in real-time",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Martian",
|
||||
"tags": [
|
||||
"General"
|
||||
]
|
||||
},
|
||||
"engine": "martian"
|
||||
}
|
||||
]
|
||||
@ -0,0 +1,23 @@
|
||||
[
|
||||
{
|
||||
"key": "chat-completions-endpoint",
|
||||
"title": "Chat Completions Endpoint",
|
||||
"description": "The endpoint to use for chat completions. See the [Martian API documentation](https://docs.withmartian.com/martian-model-router/api-reference/get-chat-completions) for more information.",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"placeholder": "https://withmartian.com/api/openai/v1/chat/completions",
|
||||
"value": "https://withmartian.com/api/openai/v1/chat/completions"
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "martian-api-key",
|
||||
"title": "API Key",
|
||||
"description": "The Martian API uses API keys for authentication. Visit your [API Keys](https://withmartian.com/dashboard) page to retrieve the API key you'll use in your requests.",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"placeholder": "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"value": "",
|
||||
"type": "password"
|
||||
}
|
||||
}
|
||||
]
|
||||
66
extensions/inference-martian-extension/src/index.ts
Normal file
66
extensions/inference-martian-extension/src/index.ts
Normal file
@ -0,0 +1,66 @@
|
||||
/**
|
||||
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
* @version 1.0.0
|
||||
* @module inference-martian-extension/src/index
|
||||
*/
|
||||
|
||||
import { RemoteOAIEngine, SettingComponentProps } from '@janhq/core'
|
||||
|
||||
declare const SETTINGS: Array<any>
|
||||
declare const MODELS: Array<any>
|
||||
|
||||
enum Settings {
|
||||
apiKey = 'martian-api-key',
|
||||
chatCompletionsEndPoint = 'chat-completions-endpoint',
|
||||
}
|
||||
|
||||
/**
|
||||
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
*/
|
||||
export default class JanInferenceMartianExtension extends RemoteOAIEngine {
|
||||
inferenceUrl: string = ''
|
||||
provider: string = 'martian'
|
||||
|
||||
override async onLoad(): Promise<void> {
|
||||
super.onLoad()
|
||||
|
||||
// Register Settings
|
||||
this.registerSettings(SETTINGS)
|
||||
this.registerModels(MODELS)
|
||||
|
||||
this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
|
||||
this.inferenceUrl = await this.getSetting<string>(
|
||||
Settings.chatCompletionsEndPoint,
|
||||
''
|
||||
)
|
||||
if (this.inferenceUrl.length === 0) {
|
||||
SETTINGS.forEach((setting) => {
|
||||
if (setting.key === Settings.chatCompletionsEndPoint) {
|
||||
this.inferenceUrl = setting.controllerProps.value as string
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
onSettingUpdate<T>(key: string, value: T): void {
|
||||
if (key === Settings.apiKey) {
|
||||
this.apiKey = value as string
|
||||
} else if (key === Settings.chatCompletionsEndPoint) {
|
||||
if (typeof value !== 'string') return
|
||||
|
||||
if (value.trim().length === 0) {
|
||||
SETTINGS.forEach((setting) => {
|
||||
if (setting.key === Settings.chatCompletionsEndPoint) {
|
||||
this.inferenceUrl = setting.controllerProps.value as string
|
||||
}
|
||||
})
|
||||
} else {
|
||||
this.inferenceUrl = value
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
14
extensions/inference-martian-extension/tsconfig.json
Normal file
14
extensions/inference-martian-extension/tsconfig.json
Normal file
@ -0,0 +1,14 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "es2016",
|
||||
"module": "ES6",
|
||||
"moduleResolution": "node",
|
||||
"outDir": "./dist",
|
||||
"esModuleInterop": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"strict": false,
|
||||
"skipLibCheck": true,
|
||||
"rootDir": "./src"
|
||||
},
|
||||
"include": ["./src"]
|
||||
}
|
||||
37
extensions/inference-martian-extension/webpack.config.js
Normal file
37
extensions/inference-martian-extension/webpack.config.js
Normal file
@ -0,0 +1,37 @@
|
||||
const webpack = require('webpack')
|
||||
const packageJson = require('./package.json')
|
||||
const settingJson = require('./resources/settings.json')
|
||||
const modelsJson = require('./resources/models.json')
|
||||
|
||||
module.exports = {
|
||||
experiments: { outputModule: true },
|
||||
entry: './src/index.ts', // Adjust the entry point to match your project's main file
|
||||
mode: 'production',
|
||||
module: {
|
||||
rules: [
|
||||
{
|
||||
test: /\.tsx?$/,
|
||||
use: 'ts-loader',
|
||||
exclude: /node_modules/,
|
||||
},
|
||||
],
|
||||
},
|
||||
plugins: [
|
||||
new webpack.DefinePlugin({
|
||||
MODELS: JSON.stringify(modelsJson),
|
||||
SETTINGS: JSON.stringify(settingJson),
|
||||
ENGINE: JSON.stringify(packageJson.engine),
|
||||
}),
|
||||
],
|
||||
output: {
|
||||
filename: 'index.js', // Adjust the output file name as needed
|
||||
library: { type: 'module' }, // Specify ESM output format
|
||||
},
|
||||
resolve: {
|
||||
extensions: ['.ts', '.js'],
|
||||
},
|
||||
optimization: {
|
||||
minimize: false,
|
||||
},
|
||||
// Add loaders and other configuration as needed for your project
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@janhq/inference-mistral-extension",
|
||||
"productName": "MistralAI Inference Engine",
|
||||
"version": "1.0.0",
|
||||
"version": "1.0.1",
|
||||
"description": "This extension enables Mistral chat completion API calls",
|
||||
"main": "dist/index.js",
|
||||
"module": "dist/module.js",
|
||||
|
||||
@ -8,48 +8,20 @@
|
||||
"id": "mistral-small-latest",
|
||||
"object": "model",
|
||||
"name": "Mistral Small",
|
||||
"version": "1.0",
|
||||
"description": "Mistral Small is the ideal choice for simpe tasks that one can do in builk - like Classification, Customer Support, or Text Generation. It offers excellent performance at an affordable price point.",
|
||||
"version": "1.1",
|
||||
"description": "Mistral Small is the ideal choice for simple tasks (Classification, Customer Support, or Text Generation) at an affordable price.",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7
|
||||
"max_tokens": 32000,
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Mistral",
|
||||
"tags": [
|
||||
"Classification",
|
||||
"Customer Support",
|
||||
"Text Generation"
|
||||
]
|
||||
},
|
||||
"engine": "mistral"
|
||||
},
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://docs.mistral.ai/api/"
|
||||
}
|
||||
],
|
||||
"id": "mistral-medium-latest",
|
||||
"object": "model",
|
||||
"name": "Mistral Medium",
|
||||
"version": "1.0",
|
||||
"description": "Mistral Medium is the ideal for intermediate tasks that require moderate reasoning - like Data extraction, Summarizing a Document, Writing a Job Description, or Writing Product Descriptions. Mistral Medium strikes a balance between performance and capability, making it suitable for a wide range of tasks that only require language transformaion",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Mistral",
|
||||
"tags": [
|
||||
"Data extraction",
|
||||
"Summarizing a Document",
|
||||
"Writing a Job Description",
|
||||
"Writing Product Descriptions"
|
||||
"General"
|
||||
]
|
||||
},
|
||||
"engine": "mistral"
|
||||
@ -63,21 +35,47 @@
|
||||
"id": "mistral-large-latest",
|
||||
"object": "model",
|
||||
"name": "Mistral Large",
|
||||
"version": "1.0",
|
||||
"description": "Mistral Large is ideal for complex tasks that require large reasoning capabilities or are highly specialized - like Synthetic Text Generation, Code Generation, RAG, or Agents.",
|
||||
"version": "1.1",
|
||||
"description": "Mistral Large is ideal for complex tasks (Synthetic Text Generation, Code Generation, RAG, or Agents).",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7
|
||||
"max_tokens": 32000,
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Mistral",
|
||||
"tags": [
|
||||
"Text Generation",
|
||||
"Code Generation",
|
||||
"RAG",
|
||||
"Agents"
|
||||
"General"
|
||||
]
|
||||
},
|
||||
"engine": "mistral"
|
||||
},
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://docs.mistral.ai/api/"
|
||||
}
|
||||
],
|
||||
"id": "open-mixtral-8x22b",
|
||||
"object": "model",
|
||||
"name": "Mixtral 8x22B",
|
||||
"version": "1.1",
|
||||
"description": "Mixtral 8x22B is a high-performance, cost-effective model designed for complex tasks.",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 32000,
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Mistral",
|
||||
"tags": [
|
||||
"General"
|
||||
]
|
||||
},
|
||||
"engine": "mistral"
|
||||
|
||||
2
extensions/inference-nitro-extension/.gitignore
vendored
Normal file
2
extensions/inference-nitro-extension/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
bin
|
||||
!version.txt
|
||||
@ -1 +1 @@
|
||||
0.3.22
|
||||
0.4.4
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
@echo off
|
||||
set /p NITRO_VERSION=<./bin/version.txt
|
||||
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
|
||||
set /p CORTEX_VERSION=<./bin/version.txt
|
||||
.\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
{
|
||||
"name": "@janhq/inference-nitro-extension",
|
||||
"productName": "Nitro Inference Engine",
|
||||
"version": "1.0.4",
|
||||
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||
"name": "@janhq/inference-cortex-extension",
|
||||
"productName": "Cortex Inference Engine",
|
||||
"version": "1.0.7",
|
||||
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||
"main": "dist/index.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
"author": "Jan <service@jan.ai>",
|
||||
@ -10,8 +10,8 @@
|
||||
"scripts": {
|
||||
"test": "jest",
|
||||
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
|
||||
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro",
|
||||
"downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-universal.tar.gz -o ./bin/ && mkdir -p ./bin/mac-universal && tar -zxvf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz --strip-components=1 -C ./bin/mac-universal && rm -rf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz && chmod +x ./bin/mac-universal/nitro",
|
||||
"downloadnitro:linux": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/cortex-cpp",
|
||||
"downloadnitro:darwin": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-arm64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz --strip-components=1 -C ./bin/mac-arm64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz && chmod +x ./bin/mac-arm64/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-amd64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz --strip-components=1 -C ./bin/mac-amd64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz && chmod +x ./bin/mac-amd64/cortex-cpp",
|
||||
"downloadnitro:win32": "download.bat",
|
||||
"downloadnitro": "run-script-os",
|
||||
"build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
|
||||
@ -8,19 +8,20 @@
|
||||
"id": "codeninja-1.0-7b",
|
||||
"object": "model",
|
||||
"name": "CodeNinja 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 8192,
|
||||
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
|
||||
"llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf"
|
||||
"llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf",
|
||||
"ngl": 32
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 8192,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
|
||||
@ -8,19 +8,20 @@
|
||||
"id": "command-r-34b",
|
||||
"object": "model",
|
||||
"name": "Command-R v01 34B Q4",
|
||||
"version": "1.3",
|
||||
"version": "1.4",
|
||||
"description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 131072,
|
||||
"prompt_template": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
|
||||
"llama_model_path": "c4ai-command-r-v01-Q4_K_M.gguf"
|
||||
"llama_model_path": "c4ai-command-r-v01-Q4_K_M.gguf",
|
||||
"ngl": 40
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 131072,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -8,19 +8,20 @@
|
||||
"id": "deepseek-coder-1.3b",
|
||||
"object": "model",
|
||||
"name": "Deepseek Coder 1.3B Q8",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 16384,
|
||||
"prompt_template": "### Instruction:\n{prompt}\n### Response:",
|
||||
"llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf"
|
||||
"llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf",
|
||||
"ngl": 24
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 16384,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -1,26 +1,27 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "deepseek-coder-33b-instruct.Q5_K_M.gguf",
|
||||
"url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q5_K_M.gguf"
|
||||
"filename": "deepseek-coder-33b-instruct.Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "deepseek-coder-34b",
|
||||
"object": "model",
|
||||
"name": "Deepseek Coder 33B Q5",
|
||||
"version": "1.0",
|
||||
"name": "Deepseek Coder 33B Q4",
|
||||
"version": "1.1",
|
||||
"description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 16384,
|
||||
"prompt_template": "### Instruction:\n{prompt}\n### Response:",
|
||||
"llama_model_path": "deepseek-coder-33b-instruct.Q5_K_M.gguf"
|
||||
"llama_model_path": "deepseek-coder-33b-instruct.Q4_K_M.gguf",
|
||||
"ngl": 62
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 16384,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -1,32 +0,0 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://huggingface.co/TheBloke/dolphin-2_6-phi-2-GGUF/resolve/main/dolphin-2_6-phi-2.Q8_0.gguf",
|
||||
"filename": "dolphin-2_6-phi-2.Q8_0.gguf"
|
||||
}
|
||||
],
|
||||
"id": "dolphin-phi-2",
|
||||
"object": "model",
|
||||
"name": "Dolphin Phi-2 2.7B Q8",
|
||||
"version": "1.0",
|
||||
"description": "Dolphin Phi-2 is a good alternative for Phi-2 in chatting",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "dolphin-2_6-phi-2.Q8_0.gguf"
|
||||
},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"stop": ["<|im_end|>"]
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Cognitive Computations, Microsoft",
|
||||
"tags": [
|
||||
"3B",
|
||||
"Finetuned"
|
||||
],
|
||||
"size": 2960000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
@ -8,19 +8,20 @@
|
||||
"id": "gemma-2b",
|
||||
"object": "model",
|
||||
"name": "Gemma 2B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Gemma is built from the same technology with Google's Gemini.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 8192,
|
||||
"prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model",
|
||||
"llama_model_path": "gemma-2b-it-q4_k_m.gguf"
|
||||
"llama_model_path": "gemma-2b-it-q4_k_m.gguf",
|
||||
"ngl": 18
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 8192,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -8,19 +8,20 @@
|
||||
"id": "gemma-7b",
|
||||
"object": "model",
|
||||
"name": "Gemma 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Google's Gemma is built for multilingual purpose",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 8192,
|
||||
"prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model",
|
||||
"llama_model_path": "gemma-7b-it-q4_K_M.gguf"
|
||||
"llama_model_path": "gemma-7b-it-q4_K_M.gguf",
|
||||
"ngl": 28
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 8192,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -14,7 +14,8 @@
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
|
||||
"llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf"
|
||||
"llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf",
|
||||
"ngl": 80
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
|
||||
@ -14,7 +14,8 @@
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
|
||||
"llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf"
|
||||
"llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf",
|
||||
"ngl": 32
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
|
||||
@ -8,19 +8,20 @@
|
||||
"id": "llama3-8b-instruct",
|
||||
"object": "model",
|
||||
"name": "Llama 3 8B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Meta's Llama 3 excels at general usage situations, including chat, general world knowledge, and coding.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 8192,
|
||||
"prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
|
||||
"llama_model_path": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"
|
||||
"llama_model_path": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
|
||||
"ngl": 32
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 8192,
|
||||
"stop": ["<|end_of_text|>","<|eot_id|>"],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -1,35 +1,38 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/resolve/main/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf"
|
||||
"filename": "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "hermes-pro-7b",
|
||||
"id": "llama3-hermes-8b",
|
||||
"object": "model",
|
||||
"name": "Hermes Pro 7B Q4",
|
||||
"name": "Hermes Pro Llama 3 8B Q4",
|
||||
"version": "1.1",
|
||||
"description": "Hermes Pro is superior in Roleplaying, Reasoning and Explaining problem.",
|
||||
"description": "Hermes Pro is well-designed for General chat and JSON output.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 8192,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf"
|
||||
"llama_model_path": "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf",
|
||||
"ngl": 32
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 8192,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "NousResearch",
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 4370000000
|
||||
"tags": [
|
||||
"7B",
|
||||
"Finetuned"
|
||||
],
|
||||
"size": 4920000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
|
||||
@ -14,7 +14,8 @@
|
||||
"settings": {
|
||||
"ctx_len": 2048,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "llamacorn-1.1b-chat.Q8_0.gguf"
|
||||
"llama_model_path": "llamacorn-1.1b-chat.Q8_0.gguf",
|
||||
"ngl": 22
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
|
||||
@ -1,34 +0,0 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "miqu-1-70b.q4_k_m.gguf",
|
||||
"url": "https://huggingface.co/miqudev/miqu-1-70b/resolve/main/miqu-1-70b.q4_k_m.gguf"
|
||||
}
|
||||
],
|
||||
"id": "miqu-70b",
|
||||
"object": "model",
|
||||
"name": "Mistral 70B Q4",
|
||||
"version": "1.0",
|
||||
"description": "A leak weight of Mistral 70B model.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"prompt_template": "[INST] {prompt} [/INST]",
|
||||
"llama_model_path": "miqu-1-70b.q4_k_m.gguf"
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "miqudev",
|
||||
"tags": ["70B", "Foundational Model"],
|
||||
"size": 26440000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
|
||||
@ -8,20 +8,21 @@
|
||||
"id": "mistral-ins-7b-q4",
|
||||
"object": "model",
|
||||
"name": "Mistral Instruct 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding of the world.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "[INST] {prompt} [/INST]",
|
||||
"llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
|
||||
"llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
|
||||
"ngl": 32
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"stop": [],
|
||||
"max_tokens": 32768,
|
||||
"stop": ["[/INST]"],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
|
||||
@ -8,19 +8,20 @@
|
||||
"id": "mixtral-8x7b-instruct",
|
||||
"object": "model",
|
||||
"name": "Mixtral 8x7B Instruct Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "[INST] {prompt} [/INST]",
|
||||
"llama_model_path": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf"
|
||||
"llama_model_path": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf",
|
||||
"ngl": 100
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 32768,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
|
||||
@ -8,19 +8,20 @@
|
||||
"id": "noromaid-7b",
|
||||
"object": "model",
|
||||
"name": "Noromaid 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "The Noromaid 7b model is designed for role-playing with human-like behavior.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "Noromaid-7B-0.4-DPO.q4_k_m.gguf"
|
||||
"llama_model_path": "Noromaid-7B-0.4-DPO.q4_k_m.gguf",
|
||||
"ngl": 32
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 32768,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -8,19 +8,20 @@
|
||||
"id": "openchat-3.5-7b",
|
||||
"object": "model",
|
||||
"name": "Openchat-3.5 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "The performance of Openchat surpasses ChatGPT-3.5 and Grok-1 across various benchmarks.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 8192,
|
||||
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
|
||||
"llama_model_path": "openchat-3.5-0106.Q4_K_M.gguf"
|
||||
"llama_model_path": "openchat-3.5-0106.Q4_K_M.gguf",
|
||||
"ngl": 32
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 8192,
|
||||
"stop": ["<|end_of_turn|>"],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -1,34 +0,0 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/janhq/openhermes-2.5-neural-chat-v3-3-slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "openhermes-neural-7b",
|
||||
"object": "model",
|
||||
"name": "OpenHermes Neural 7B Q4",
|
||||
"version": "1.1",
|
||||
"description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf"
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Intel, Jan",
|
||||
"tags": ["7B", "Merged"],
|
||||
"size": 4370000000,
|
||||
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/openhermes-neural-7b/cover.png"
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
@ -13,7 +13,7 @@
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"prompt_template": "<|system|>\n{system_message}<|end|>\n<|user|>\n{prompt}<|end|>\n<|assistant|>\n",
|
||||
"prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n",
|
||||
"llama_model_path": "Phi-3-mini-4k-instruct-q4.gguf"
|
||||
},
|
||||
"parameters": {
|
||||
|
||||
@ -8,19 +8,20 @@
|
||||
"id": "phind-34b",
|
||||
"object": "model",
|
||||
"name": "Phind 34B Q4",
|
||||
"version": "1.1",
|
||||
"version": "1.2",
|
||||
"description": "Phind 34B is the best Open-source coding model.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 16384,
|
||||
"prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant",
|
||||
"llama_model_path": "phind-codellama-34b-v2.Q4_K_M.gguf"
|
||||
"llama_model_path": "phind-codellama-34b-v2.Q4_K_M.gguf",
|
||||
"ngl": 48
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 16384,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -8,19 +8,20 @@
|
||||
"id": "qwen-7b",
|
||||
"object": "model",
|
||||
"name": "Qwen Chat 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Qwen is optimized at Chinese, ideal for everyday tasks.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "qwen1_5-7b-chat-q4_k_m.gguf"
|
||||
"llama_model_path": "qwen1_5-7b-chat-q4_k_m.gguf",
|
||||
"ngl": 32
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 32768,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -14,7 +14,8 @@
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"prompt_template": "<|user|>\n{prompt}<|endoftext|>\n<|assistant|>",
|
||||
"llama_model_path": "stablelm-zephyr-3b.Q8_0.gguf"
|
||||
"llama_model_path": "stablelm-zephyr-3b.Q8_0.gguf",
|
||||
"ngl": 32
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
|
||||
@ -12,15 +12,16 @@
|
||||
"description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "stealth-v1.3.Q4_K_M.gguf"
|
||||
"llama_model_path": "stealth-v1.3.Q4_K_M.gguf",
|
||||
"ngl": 32
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 32768,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
|
||||
@ -14,7 +14,8 @@
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>",
|
||||
"llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
|
||||
"llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
|
||||
"ngl": 22
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
|
||||
@ -12,15 +12,16 @@
|
||||
"description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "trinity-v1.2.Q4_K_M.gguf"
|
||||
"llama_model_path": "trinity-v1.2.Q4_K_M.gguf",
|
||||
"ngl": 32
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 32768,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
|
||||
@ -8,19 +8,20 @@
|
||||
"id": "vistral-7b",
|
||||
"object": "model",
|
||||
"name": "Vistral 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Vistral 7B has a deep understanding of Vietnamese.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n{prompt} [/INST]",
|
||||
"llama_model_path": "vistral-7b-chat-dpo.Q4_K_M.gguf"
|
||||
"llama_model_path": "vistral-7b-chat-dpo.Q4_K_M.gguf",
|
||||
"ngl": 32
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 32768,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -12,15 +12,16 @@
|
||||
"description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 16384,
|
||||
"prompt_template": "### Instruction:\n{prompt}\n### Response:",
|
||||
"llama_model_path": "wizardcoder-python-13b-v1.0.Q4_K_M.gguf"
|
||||
"llama_model_path": "wizardcoder-python-13b-v1.0.Q4_K_M.gguf",
|
||||
"ngl": 40
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 16384,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -14,7 +14,8 @@
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "yi-34b-chat.Q4_K_M.gguf"
|
||||
"llama_model_path": "yi-34b-chat.Q4_K_M.gguf",
|
||||
"ngl": 60
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
|
||||
@ -12,21 +12,17 @@ const codeninja7bJson = require('./resources/models/codeninja-1.0-7b/model.json'
|
||||
const commandr34bJson = require('./resources/models/command-r-34b/model.json')
|
||||
const deepseekCoder13bJson = require('./resources/models/deepseek-coder-1.3b/model.json')
|
||||
const deepseekCoder34bJson = require('./resources/models/deepseek-coder-34b/model.json')
|
||||
const dolphinPhi2Json = require('./resources/models/dolphin-phi-2/model.json')
|
||||
const gemma2bJson = require('./resources/models/gemma-2b/model.json')
|
||||
const gemma7bJson = require('./resources/models/gemma-7b/model.json')
|
||||
const hermesPro7bJson = require('./resources/models/hermes-pro-7b/model.json')
|
||||
const llama2Chat70bJson = require('./resources/models/llama2-chat-70b/model.json')
|
||||
const llama2Chat7bJson = require('./resources/models/llama2-chat-7b/model.json')
|
||||
const llamacorn1bJson = require('./resources/models/llamacorn-1.1b/model.json')
|
||||
const llava13bJson = require('./resources/models/llava-13b/model.json')
|
||||
const llava7bJson = require('./resources/models/llava-7b/model.json')
|
||||
const miqu70bJson = require('./resources/models/miqu-70b/model.json')
|
||||
const mistralIns7bq4Json = require('./resources/models/mistral-ins-7b-q4/model.json')
|
||||
const mixtral8x7bInstructJson = require('./resources/models/mixtral-8x7b-instruct/model.json')
|
||||
const noromaid7bJson = require('./resources/models/noromaid-7b/model.json')
|
||||
const openchat357bJson = require('./resources/models/openchat-3.5-7b/model.json')
|
||||
const openhermesNeural7bJson = require('./resources/models/openhermes-neural-7b/model.json')
|
||||
const phind34bJson = require('./resources/models/phind-34b/model.json')
|
||||
const qwen7bJson = require('./resources/models/qwen-7b/model.json')
|
||||
const stableZephyr3bJson = require('./resources/models/stable-zephyr-3b/model.json')
|
||||
@ -37,6 +33,7 @@ const vistral7bJson = require('./resources/models/vistral-7b/model.json')
|
||||
const wizardcoder13bJson = require('./resources/models/wizardcoder-13b/model.json')
|
||||
const yi34bJson = require('./resources/models/yi-34b/model.json')
|
||||
const llama3Json = require('./resources/models/llama3-8b-instruct/model.json')
|
||||
const llama3Hermes8bJson = require('./resources/models/llama3-hermes-8b/model.json')
|
||||
|
||||
export default [
|
||||
{
|
||||
@ -56,21 +53,17 @@ export default [
|
||||
commandr34bJson,
|
||||
deepseekCoder13bJson,
|
||||
deepseekCoder34bJson,
|
||||
dolphinPhi2Json,
|
||||
gemma2bJson,
|
||||
gemma7bJson,
|
||||
hermesPro7bJson,
|
||||
llama2Chat70bJson,
|
||||
llama2Chat7bJson,
|
||||
llamacorn1bJson,
|
||||
llava13bJson,
|
||||
llava7bJson,
|
||||
miqu70bJson,
|
||||
mistralIns7bq4Json,
|
||||
mixtral8x7bInstructJson,
|
||||
noromaid7bJson,
|
||||
openchat357bJson,
|
||||
openhermesNeural7bJson,
|
||||
phind34bJson,
|
||||
qwen7bJson,
|
||||
stableZephyr3bJson,
|
||||
@ -80,13 +73,14 @@ export default [
|
||||
vistral7bJson,
|
||||
wizardcoder13bJson,
|
||||
yi34bJson,
|
||||
llama3Json
|
||||
llama3Json,
|
||||
llama3Hermes8bJson
|
||||
]),
|
||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
||||
INFERENCE_URL: JSON.stringify(
|
||||
process.env.INFERENCE_URL ||
|
||||
'http://127.0.0.1:3928/inferences/llamacpp/chat_completion'
|
||||
'http://127.0.0.1:3928/inferences/server/chat_completion'
|
||||
),
|
||||
TROUBLESHOOTING_URL: JSON.stringify(
|
||||
'https://jan.ai/guides/troubleshooting'
|
||||
|
||||
@ -130,7 +130,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
|
||||
const executableFolderPath = await joinPath([
|
||||
janDataFolderPath,
|
||||
'engines',
|
||||
this.name ?? 'nitro',
|
||||
this.name ?? 'cortex-cpp',
|
||||
this.version ?? '1.0.0',
|
||||
])
|
||||
|
||||
@ -179,7 +179,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
|
||||
const executableFolderPath = await joinPath([
|
||||
janDataFolderPath,
|
||||
'engines',
|
||||
this.name ?? 'nitro',
|
||||
this.name ?? 'cortex-cpp',
|
||||
this.version ?? '1.0.0',
|
||||
])
|
||||
|
||||
|
||||
@ -33,9 +33,22 @@ describe('test executable nitro file', () => {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'darwin',
|
||||
})
|
||||
Object.defineProperty(process, 'arch', {
|
||||
value: 'arm64',
|
||||
})
|
||||
expect(executableNitroFile(testSettings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`mac-universal${sep}nitro`),
|
||||
executablePath: expect.stringContaining(`mac-arm64${sep}cortex-cpp`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
)
|
||||
Object.defineProperty(process, 'arch', {
|
||||
value: 'amd64',
|
||||
})
|
||||
expect(executableNitroFile(testSettings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`mac-amd64${sep}cortex-cpp`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -56,7 +69,7 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`win-cpu${sep}nitro.exe`),
|
||||
executablePath: expect.stringContaining(`win-cpu${sep}cortex-cpp.exe`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -89,7 +102,7 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`win-cuda-11-7${sep}nitro.exe`),
|
||||
executablePath: expect.stringContaining(`win-cuda-11-7${sep}cortex-cpp.exe`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -122,7 +135,7 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`win-cuda-12-0${sep}nitro.exe`),
|
||||
executablePath: expect.stringContaining(`win-cuda-12-0${sep}cortex-cpp.exe`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -139,7 +152,7 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`linux-cpu${sep}nitro`),
|
||||
executablePath: expect.stringContaining(`linux-cpu${sep}cortex-cpp`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -172,7 +185,7 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`linux-cuda-11-7${sep}nitro`),
|
||||
executablePath: expect.stringContaining(`linux-cuda-11-7${sep}cortex-cpp`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -205,7 +218,7 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`linux-cuda-12-0${sep}nitro`),
|
||||
executablePath: expect.stringContaining(`linux-cuda-12-0${sep}cortex-cpp`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { GpuSetting, SystemInformation } from '@janhq/core'
|
||||
import { GpuSetting } from '@janhq/core'
|
||||
import * as path from 'path'
|
||||
|
||||
export interface NitroExecutableOptions {
|
||||
@ -24,7 +24,7 @@ const os = (): string => {
|
||||
return process.platform === 'win32'
|
||||
? 'win'
|
||||
: process.platform === 'darwin'
|
||||
? 'mac-universal'
|
||||
? process.arch === 'arm64' ? 'mac-arm64' : 'mac-amd64'
|
||||
: 'linux'
|
||||
}
|
||||
|
||||
@ -52,7 +52,7 @@ export const executableNitroFile = (
|
||||
.join('-')
|
||||
let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||
let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||
let binaryName = `nitro${extension()}`
|
||||
let binaryName = `cortex-cpp${extension()}`
|
||||
|
||||
return {
|
||||
executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName),
|
||||
|
||||
@ -34,9 +34,9 @@ const LOCAL_HOST = '127.0.0.1'
|
||||
// The URL for the Nitro subprocess
|
||||
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`
|
||||
// The URL for the Nitro subprocess to load a model
|
||||
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
|
||||
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
|
||||
// The URL for the Nitro subprocess to validate a model
|
||||
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
|
||||
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
|
||||
// The URL for the Nitro subprocess to kill itself
|
||||
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
|
||||
|
||||
@ -50,7 +50,7 @@ const SUPPORTED_MODEL_FORMAT = '.gguf'
|
||||
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
|
||||
|
||||
// The current model settings
|
||||
let currentSettings: ModelSettingParams | undefined = undefined
|
||||
let currentSettings: ModelSettingParams & { model?: string } | undefined = undefined
|
||||
|
||||
/**
|
||||
* Stops a Nitro subprocess.
|
||||
@ -77,7 +77,7 @@ async function loadModel(
|
||||
}
|
||||
|
||||
if (params.model.engine !== InferenceEngine.nitro) {
|
||||
return Promise.reject('Not a nitro model')
|
||||
return Promise.reject('Not a cortex model')
|
||||
} else {
|
||||
const nitroResourceProbe = await getSystemResourceInfo()
|
||||
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
|
||||
@ -135,6 +135,7 @@ async function loadModel(
|
||||
// model.settings can override the default settings
|
||||
...params.model.settings,
|
||||
llama_model_path,
|
||||
model: params.model.id,
|
||||
// This is critical and requires real CPU physical core count (or performance core)
|
||||
...(params.model.settings.mmproj && {
|
||||
mmproj: path.isAbsolute(params.model.settings.mmproj)
|
||||
@ -142,7 +143,7 @@ async function loadModel(
|
||||
: path.join(modelFolder, params.model.settings.mmproj),
|
||||
}),
|
||||
}
|
||||
return runNitroAndLoadModel(systemInfo)
|
||||
return runNitroAndLoadModel(params.model.id, systemInfo)
|
||||
}
|
||||
}
|
||||
|
||||
@ -152,7 +153,7 @@ async function loadModel(
|
||||
* 3. Validate model status
|
||||
* @returns
|
||||
*/
|
||||
async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
|
||||
async function runNitroAndLoadModel(modelId: string, systemInfo?: SystemInformation) {
|
||||
// Gather system information for CPU physical cores and memory
|
||||
return killSubprocess()
|
||||
.then(() =>
|
||||
@ -160,10 +161,10 @@ async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
|
||||
)
|
||||
.then(() => spawnNitroProcess(systemInfo))
|
||||
.then(() => loadLLMModel(currentSettings))
|
||||
.then(validateModelStatus)
|
||||
.then(() => validateModelStatus(modelId))
|
||||
.catch((err) => {
|
||||
// TODO: Broadcast error so app could display proper error message
|
||||
log(`[NITRO]::Error: ${err}`)
|
||||
log(`[CORTEX]::Error: ${err}`)
|
||||
return { error: err }
|
||||
})
|
||||
}
|
||||
@ -222,7 +223,7 @@ function loadLLMModel(settings: any): Promise<Response> {
|
||||
if (!settings?.ngl) {
|
||||
settings.ngl = 100
|
||||
}
|
||||
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`)
|
||||
log(`[CORTEX]::Debug: Loading model with params ${JSON.stringify(settings)}`)
|
||||
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
@ -234,14 +235,14 @@ function loadLLMModel(settings: any): Promise<Response> {
|
||||
})
|
||||
.then((res) => {
|
||||
log(
|
||||
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
|
||||
`[CORTEX]::Debug: Load model success with response ${JSON.stringify(
|
||||
res
|
||||
)}`
|
||||
)
|
||||
return Promise.resolve(res)
|
||||
})
|
||||
.catch((err) => {
|
||||
log(`[NITRO]::Error: Load model failed with error ${err}`)
|
||||
log(`[CORTEX]::Error: Load model failed with error ${err}`)
|
||||
return Promise.reject(err)
|
||||
})
|
||||
}
|
||||
@ -252,11 +253,12 @@ function loadLLMModel(settings: any): Promise<Response> {
|
||||
* If the model is loaded successfully, the object is empty.
|
||||
* If the model is not loaded successfully, the object contains an error message.
|
||||
*/
|
||||
async function validateModelStatus(): Promise<void> {
|
||||
async function validateModelStatus(modelId: string): Promise<void> {
|
||||
// Send a GET request to the validation URL.
|
||||
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
|
||||
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
|
||||
method: 'GET',
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ model: modelId }),
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
@ -264,7 +266,7 @@ async function validateModelStatus(): Promise<void> {
|
||||
retryDelay: 300,
|
||||
}).then(async (res: Response) => {
|
||||
log(
|
||||
`[NITRO]::Debug: Validate model state with response ${JSON.stringify(
|
||||
`[CORTEX]::Debug: Validate model state with response ${JSON.stringify(
|
||||
res.status
|
||||
)}`
|
||||
)
|
||||
@ -275,7 +277,7 @@ async function validateModelStatus(): Promise<void> {
|
||||
// Otherwise, return an object with an error message.
|
||||
if (body.model_loaded) {
|
||||
log(
|
||||
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
|
||||
`[CORTEX]::Debug: Validate model state success with response ${JSON.stringify(
|
||||
body
|
||||
)}`
|
||||
)
|
||||
@ -283,7 +285,7 @@ async function validateModelStatus(): Promise<void> {
|
||||
}
|
||||
}
|
||||
log(
|
||||
`[NITRO]::Debug: Validate model state failed with response ${JSON.stringify(
|
||||
`[CORTEX]::Debug: Validate model state failed with response ${JSON.stringify(
|
||||
res.statusText
|
||||
)}`
|
||||
)
|
||||
@ -298,7 +300,7 @@ async function validateModelStatus(): Promise<void> {
|
||||
async function killSubprocess(): Promise<void> {
|
||||
const controller = new AbortController()
|
||||
setTimeout(() => controller.abort(), 5000)
|
||||
log(`[NITRO]::Debug: Request to kill Nitro`)
|
||||
log(`[CORTEX]::Debug: Request to kill cortex`)
|
||||
|
||||
const killRequest = () => {
|
||||
return fetch(NITRO_HTTP_KILL_URL, {
|
||||
@ -309,17 +311,17 @@ async function killSubprocess(): Promise<void> {
|
||||
.then(() =>
|
||||
tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
|
||||
)
|
||||
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
|
||||
.then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
|
||||
.catch((err) => {
|
||||
log(
|
||||
`[NITRO]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
|
||||
`[CORTEX]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
|
||||
)
|
||||
throw 'PORT_NOT_AVAILABLE'
|
||||
})
|
||||
}
|
||||
|
||||
if (subprocess?.pid) {
|
||||
log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`)
|
||||
log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
|
||||
const pid = subprocess.pid
|
||||
return new Promise((resolve, reject) => {
|
||||
terminate(pid, function (err) {
|
||||
@ -329,7 +331,7 @@ async function killSubprocess(): Promise<void> {
|
||||
tcpPortUsed
|
||||
.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
|
||||
.then(() => resolve())
|
||||
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
|
||||
.then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
|
||||
.catch(() => {
|
||||
killRequest().then(resolve).catch(reject)
|
||||
})
|
||||
@ -346,22 +348,24 @@ async function killSubprocess(): Promise<void> {
|
||||
* @returns A promise that resolves when the Nitro subprocess is started.
|
||||
*/
|
||||
function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
|
||||
log(`[NITRO]::Debug: Spawning Nitro subprocess...`)
|
||||
log(`[CORTEX]::Debug: Spawning cortex subprocess...`)
|
||||
|
||||
return new Promise<void>(async (resolve, reject) => {
|
||||
let binaryFolder = path.join(__dirname, '..', 'bin') // Current directory by default
|
||||
let executableOptions = executableNitroFile(systemInfo?.gpuSetting)
|
||||
|
||||
const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
|
||||
// Execute the binary
|
||||
log(
|
||||
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||
`[CORTEX]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||
)
|
||||
log(
|
||||
path.parse(executableOptions.executablePath).dir
|
||||
)
|
||||
subprocess = spawn(
|
||||
executableOptions.executablePath,
|
||||
['1', LOCAL_HOST, PORT.toString()],
|
||||
{
|
||||
cwd: binaryFolder,
|
||||
cwd: path.join(path.parse(executableOptions.executablePath).dir),
|
||||
env: {
|
||||
...process.env,
|
||||
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
||||
@ -375,15 +379,15 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
|
||||
|
||||
// Handle subprocess output
|
||||
subprocess.stdout.on('data', (data: any) => {
|
||||
log(`[NITRO]::Debug: ${data}`)
|
||||
log(`[CORTEX]::Debug: ${data}`)
|
||||
})
|
||||
|
||||
subprocess.stderr.on('data', (data: any) => {
|
||||
log(`[NITRO]::Error: ${data}`)
|
||||
log(`[CORTEX]::Error: ${data}`)
|
||||
})
|
||||
|
||||
subprocess.on('close', (code: any) => {
|
||||
log(`[NITRO]::Debug: Nitro exited with code: ${code}`)
|
||||
log(`[CORTEX]::Debug: cortex exited with code: ${code}`)
|
||||
subprocess = undefined
|
||||
reject(`child process exited with code ${code}`)
|
||||
})
|
||||
@ -391,7 +395,7 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
|
||||
tcpPortUsed
|
||||
.waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
|
||||
.then(() => {
|
||||
log(`[NITRO]::Debug: Nitro is ready`)
|
||||
log(`[CORTEX]::Debug: cortex is ready`)
|
||||
resolve()
|
||||
})
|
||||
})
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@janhq/inference-openai-extension",
|
||||
"productName": "OpenAI Inference Engine",
|
||||
"version": "1.0.0",
|
||||
"version": "1.0.2",
|
||||
"description": "This extension enables OpenAI chat completion API calls",
|
||||
"main": "dist/index.js",
|
||||
"module": "dist/module.js",
|
||||
|
||||
@ -5,20 +5,27 @@
|
||||
"url": "https://openai.com"
|
||||
}
|
||||
],
|
||||
"id": "gpt-4",
|
||||
"id": "gpt-4-turbo",
|
||||
"object": "model",
|
||||
"name": "OpenAI GPT 4",
|
||||
"version": "1.0",
|
||||
"description": "OpenAI GPT 4 model is extremely good",
|
||||
"name": "OpenAI GPT 4 Turbo",
|
||||
"version": "1.2",
|
||||
"description": "OpenAI GPT 4 Turbo model is extremely good",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "OpenAI",
|
||||
"tags": ["General", "Big Context Length"]
|
||||
"tags": [
|
||||
"General"
|
||||
]
|
||||
},
|
||||
"engine": "openai"
|
||||
},
|
||||
@ -31,8 +38,8 @@
|
||||
"id": "gpt-4-vision-preview",
|
||||
"object": "model",
|
||||
"name": "OpenAI GPT 4 with Vision (Preview)",
|
||||
"version": "1.0",
|
||||
"description": "OpenAI GPT 4 with Vision model is extremely good in preview",
|
||||
"version": "1.1",
|
||||
"description": "OpenAI GPT-4 Vision model features vision understanding capabilities",
|
||||
"format": "api",
|
||||
"settings": {
|
||||
"vision_model": true,
|
||||
@ -40,34 +47,16 @@
|
||||
},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true
|
||||
},
|
||||
"metadata": {
|
||||
"author": "OpenAI",
|
||||
"tags": ["General", "Big Context Length", "Vision"]
|
||||
},
|
||||
"engine": "openai"
|
||||
},
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://openai.com"
|
||||
}
|
||||
],
|
||||
"id": "gpt-3.5-turbo-16k-0613",
|
||||
"object": "model",
|
||||
"name": "OpenAI GPT 3.5 Turbo 16k 0613",
|
||||
"version": "1.0",
|
||||
"description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7
|
||||
},
|
||||
"metadata": {
|
||||
"author": "OpenAI",
|
||||
"tags": ["General", "Big Context Length"]
|
||||
"tags": [
|
||||
"General",
|
||||
"Vision"
|
||||
]
|
||||
},
|
||||
"engine": "openai"
|
||||
},
|
||||
@ -80,17 +69,54 @@
|
||||
"id": "gpt-3.5-turbo",
|
||||
"object": "model",
|
||||
"name": "OpenAI GPT 3.5 Turbo",
|
||||
"version": "1.0",
|
||||
"description": "OpenAI GPT 3.5 Turbo model is extremely good",
|
||||
"version": "1.1",
|
||||
"description": "OpenAI GPT 3.5 Turbo model is extremely fast",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "OpenAI",
|
||||
"tags": ["General", "Big Context Length"]
|
||||
"tags": [
|
||||
"General"
|
||||
]
|
||||
},
|
||||
"engine": "openai"
|
||||
},
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://openai.com"
|
||||
}
|
||||
],
|
||||
"id": "gpt-4o",
|
||||
"object": "model",
|
||||
"name": "OpenAI GPT 4o",
|
||||
"version": "1.1",
|
||||
"description": "OpenAI GPT 4o is a new flagship model with fast speed and high quality",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "OpenAI",
|
||||
"tags": [
|
||||
"General"
|
||||
]
|
||||
},
|
||||
"engine": "openai"
|
||||
}
|
||||
|
||||
79
extensions/inference-openrouter-extension/README.md
Normal file
79
extensions/inference-openrouter-extension/README.md
Normal file
@ -0,0 +1,79 @@
|
||||
# Open Router Engine Extension
|
||||
|
||||
Created using Jan extension example
|
||||
|
||||
# Create a Jan Extension using Typescript
|
||||
|
||||
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
|
||||
|
||||
## Create Your Own Extension
|
||||
|
||||
To create your own extension, you can use this repository as a template! Just follow the below instructions:
|
||||
|
||||
1. Click the Use this template button at the top of the repository
|
||||
2. Select Create a new repository
|
||||
3. Select an owner and name for your new repository
|
||||
4. Click Create repository
|
||||
5. Clone your new repository
|
||||
|
||||
## Initial Setup
|
||||
|
||||
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
|
||||
|
||||
> [!NOTE]
|
||||
>
|
||||
> You'll need to have a reasonably modern version of
|
||||
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
|
||||
> [`nodenv`](https://github.com/nodenv/nodenv) or
|
||||
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
|
||||
> root of your repository to install the version specified in
|
||||
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
|
||||
|
||||
1. :hammer_and_wrench: Install the dependencies
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
1. :building_construction: Package the TypeScript for distribution
|
||||
|
||||
```bash
|
||||
npm run bundle
|
||||
```
|
||||
|
||||
1. :white_check_mark: Check your artifact
|
||||
|
||||
There will be a tgz file in your extension directory now
|
||||
|
||||
## Update the Extension Metadata
|
||||
|
||||
The [`package.json`](package.json) file defines metadata about your extension, such as
|
||||
extension name, main entry, description and version.
|
||||
|
||||
When you copy this repository, update `package.json` with the name, description for your extension.
|
||||
|
||||
## Update the Extension Code
|
||||
|
||||
The [`src/`](./src/) directory is the heart of your extension! This contains the
|
||||
source code that will be run when your extension functions are invoked. You can replace the
|
||||
contents of this directory with your own code.
|
||||
|
||||
There are a few things to keep in mind when writing your extension code:
|
||||
|
||||
- Most Jan Extension functions are processed asynchronously.
|
||||
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
|
||||
|
||||
```typescript
|
||||
import { events, MessageEvent, MessageRequest } from '@janhq/core'
|
||||
|
||||
function onStart(): Promise<any> {
|
||||
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
|
||||
this.inference(data)
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
For more information about the Jan Extension Core module, see the
|
||||
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
|
||||
|
||||
So, what are you waiting for? Go ahead and start customizing your extension!
|
||||
43
extensions/inference-openrouter-extension/package.json
Normal file
43
extensions/inference-openrouter-extension/package.json
Normal file
@ -0,0 +1,43 @@
|
||||
{
|
||||
"name": "@janhq/inference-openrouter-extension",
|
||||
"productName": "OpenRouter Inference Engine",
|
||||
"version": "1.0.0",
|
||||
"description": "This extension enables Open Router chat completion API calls",
|
||||
"main": "dist/index.js",
|
||||
"module": "dist/module.js",
|
||||
"engine": "openrouter",
|
||||
"author": "Jan <service@jan.ai>",
|
||||
"license": "AGPL-3.0",
|
||||
"scripts": {
|
||||
"build": "tsc -b . && webpack --config webpack.config.js",
|
||||
"build:publish": "rimraf *.tgz --glob && yarn build && npm pack && cpx *.tgz ../../pre-install",
|
||||
"sync:core": "cd ../.. && yarn build:core && cd extensions && rm yarn.lock && cd inference-openrouter-extension && yarn && yarn build:publish"
|
||||
},
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
"./main": "./dist/module.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"cpx": "^1.5.0",
|
||||
"rimraf": "^3.0.2",
|
||||
"webpack": "^5.88.2",
|
||||
"webpack-cli": "^5.1.4",
|
||||
"ts-loader": "^9.5.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@janhq/core": "file:../../core",
|
||||
"fetch-retry": "^5.0.6",
|
||||
"ulidx": "^2.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"files": [
|
||||
"dist/*",
|
||||
"package.json",
|
||||
"README.md"
|
||||
],
|
||||
"bundleDependencies": [
|
||||
"fetch-retry"
|
||||
]
|
||||
}
|
||||
@ -0,0 +1,28 @@
|
||||
[
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://openrouter.ai"
|
||||
}
|
||||
],
|
||||
"id": "open-router-auto",
|
||||
"object": "model",
|
||||
"name": "OpenRouter",
|
||||
"version": "1.0",
|
||||
"description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {
|
||||
"max_tokens": 1024,
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "OpenRouter",
|
||||
"tags": ["General", "Big Context Length"]
|
||||
},
|
||||
"engine": "openrouter"
|
||||
}
|
||||
]
|
||||
@ -0,0 +1,23 @@
|
||||
[
|
||||
{
|
||||
"key": "chat-completions-endpoint",
|
||||
"title": "Chat Completions Endpoint",
|
||||
"description": "The endpoint to use for chat completions. See the [OpenRouter API documentation](https://openrouter.ai/docs) for more information.",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"placeholder": "https://openrouter.ai/api/v1/chat/completions",
|
||||
"value": "https://openrouter.ai/api/v1/chat/completions"
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "openrouter-api-key",
|
||||
"title": "API Key",
|
||||
"description": "The OpenRouter API uses API keys for authentication. Visit your [API Keys](https://openrouter.ai/keys) page to retrieve the API key you'll use in your requests.",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"placeholder": "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"value": "",
|
||||
"type": "password"
|
||||
}
|
||||
}
|
||||
]
|
||||
76
extensions/inference-openrouter-extension/src/index.ts
Normal file
76
extensions/inference-openrouter-extension/src/index.ts
Normal file
@ -0,0 +1,76 @@
|
||||
/**
|
||||
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
* @version 1.0.0
|
||||
* @module inference-openai-extension/src/index
|
||||
*/
|
||||
|
||||
import { RemoteOAIEngine } from '@janhq/core'
|
||||
import { PayloadType } from '@janhq/core'
|
||||
import { ChatCompletionRole } from '@janhq/core'
|
||||
|
||||
declare const SETTINGS: Array<any>
|
||||
declare const MODELS: Array<any>
|
||||
|
||||
enum Settings {
|
||||
apiKey = 'openrouter-api-key',
|
||||
chatCompletionsEndPoint = 'chat-completions-endpoint',
|
||||
}
|
||||
|
||||
enum RoleType {
|
||||
user = 'USER',
|
||||
chatbot = 'CHATBOT',
|
||||
system = 'SYSTEM',
|
||||
}
|
||||
|
||||
/**
|
||||
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
*/
|
||||
export default class JanInferenceOpenRouterExtension extends RemoteOAIEngine {
|
||||
inferenceUrl: string = ''
|
||||
provider: string = 'openrouter'
|
||||
|
||||
override async onLoad(): Promise<void> {
|
||||
super.onLoad()
|
||||
|
||||
// Register Settings
|
||||
this.registerSettings(SETTINGS)
|
||||
this.registerModels(MODELS)
|
||||
|
||||
this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
|
||||
this.inferenceUrl = await this.getSetting<string>(
|
||||
Settings.chatCompletionsEndPoint,
|
||||
''
|
||||
)
|
||||
if (this.inferenceUrl.length === 0) {
|
||||
SETTINGS.forEach((setting) => {
|
||||
if (setting.key === Settings.chatCompletionsEndPoint) {
|
||||
this.inferenceUrl = setting.controllerProps.value as string
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
onSettingUpdate<T>(key: string, value: T): void {
|
||||
if (key === Settings.apiKey) {
|
||||
this.apiKey = value as string
|
||||
} else if (key === Settings.chatCompletionsEndPoint) {
|
||||
if (typeof value !== 'string') return
|
||||
|
||||
if (value.trim().length === 0) {
|
||||
SETTINGS.forEach((setting) => {
|
||||
if (setting.key === Settings.chatCompletionsEndPoint) {
|
||||
this.inferenceUrl = setting.controllerProps.value as string
|
||||
}
|
||||
})
|
||||
} else {
|
||||
this.inferenceUrl = value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
transformPayload = (payload: PayloadType)=>({...payload,model:"openrouter/auto"})
|
||||
}
|
||||
14
extensions/inference-openrouter-extension/tsconfig.json
Normal file
14
extensions/inference-openrouter-extension/tsconfig.json
Normal file
@ -0,0 +1,14 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "es2016",
|
||||
"module": "ES6",
|
||||
"moduleResolution": "node",
|
||||
"outDir": "./dist",
|
||||
"esModuleInterop": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"strict": false,
|
||||
"skipLibCheck": true,
|
||||
"rootDir": "./src"
|
||||
},
|
||||
"include": ["./src"]
|
||||
}
|
||||
37
extensions/inference-openrouter-extension/webpack.config.js
Normal file
37
extensions/inference-openrouter-extension/webpack.config.js
Normal file
@ -0,0 +1,37 @@
|
||||
const webpack = require('webpack')
|
||||
const packageJson = require('./package.json')
|
||||
const settingJson = require('./resources/settings.json')
|
||||
const modelsJson = require('./resources/models.json')
|
||||
|
||||
module.exports = {
|
||||
experiments: { outputModule: true },
|
||||
entry: './src/index.ts', // Adjust the entry point to match your project's main file
|
||||
mode: 'production',
|
||||
module: {
|
||||
rules: [
|
||||
{
|
||||
test: /\.tsx?$/,
|
||||
use: 'ts-loader',
|
||||
exclude: /node_modules/,
|
||||
},
|
||||
],
|
||||
},
|
||||
plugins: [
|
||||
new webpack.DefinePlugin({
|
||||
MODELS: JSON.stringify(modelsJson),
|
||||
SETTINGS: JSON.stringify(settingJson),
|
||||
ENGINE: JSON.stringify(packageJson.engine),
|
||||
}),
|
||||
],
|
||||
output: {
|
||||
filename: 'index.js', // Adjust the output file name as needed
|
||||
library: { type: 'module' }, // Specify ESM output format
|
||||
},
|
||||
resolve: {
|
||||
extensions: ['.ts', '.js'],
|
||||
},
|
||||
optimization: {
|
||||
minimize: false,
|
||||
},
|
||||
// Add loaders and other configuration as needed for your project
|
||||
}
|
||||
@ -97,7 +97,7 @@ function unloadModel(): Promise<void> {
|
||||
}
|
||||
|
||||
if (subprocess?.pid) {
|
||||
log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`)
|
||||
log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
|
||||
const pid = subprocess.pid
|
||||
return new Promise((resolve, reject) => {
|
||||
terminate(pid, function (err) {
|
||||
@ -107,7 +107,7 @@ function unloadModel(): Promise<void> {
|
||||
return tcpPortUsed
|
||||
.waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000)
|
||||
.then(() => resolve())
|
||||
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
|
||||
.then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
|
||||
.catch(() => {
|
||||
killRequest()
|
||||
})
|
||||
|
||||
@ -45,6 +45,7 @@
|
||||
--border: 20 5.9% 90%;
|
||||
--input: 20 5.9% 90%;
|
||||
--ring: 20 14.3% 4.1%;
|
||||
--scroll-bar: 60, 3%, 86%;
|
||||
|
||||
.primary-blue {
|
||||
--primary: 221 83% 53%;
|
||||
|
||||
@ -21,3 +21,38 @@
|
||||
@apply bg-border relative z-50 w-[10px] rounded-full;
|
||||
}
|
||||
}
|
||||
|
||||
// Customized scroll bar
|
||||
::-webkit-scrollbar {
|
||||
width: 7px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-thumb {
|
||||
background-color: hsl(var(--scroll-bar));
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-track {
|
||||
background-color: hsl(var(--background));
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-corner {
|
||||
background-color: hsl(var(--background));
|
||||
}
|
||||
|
||||
::-moz-scrollbar {
|
||||
width: 7px;
|
||||
}
|
||||
|
||||
::-moz-scrollbar-thumb {
|
||||
background-color: hsl(var(--scroll-bar));
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
::-moz-scrollbar-track {
|
||||
background-color: hsl(var(--background));
|
||||
}
|
||||
|
||||
::-moz-scrollbar-corner {
|
||||
background-color: hsl(var(--background));
|
||||
}
|
||||
|
||||
@ -25,6 +25,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal'
|
||||
import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
|
||||
import SelectingModelModal from '@/screens/Settings/SelectingModelModal'
|
||||
|
||||
import LoadingModal from '../LoadingModal'
|
||||
|
||||
import MainViewContainer from '../MainViewContainer'
|
||||
|
||||
import InstallingExtensionModal from './BottomBar/InstallingExtension/InstallingExtensionModal'
|
||||
@ -69,6 +71,7 @@ const BaseLayout = () => {
|
||||
<BottomBar />
|
||||
</div>
|
||||
</div>
|
||||
<LoadingModal />
|
||||
{importModelStage === 'SELECTING_MODEL' && <SelectingModelModal />}
|
||||
{importModelStage === 'MODEL_SELECTED' && <ImportModelOptionModal />}
|
||||
{importModelStage === 'IMPORTING_MODEL' && <ImportingModelModal />}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { ReactNode, useEffect, useRef } from 'react'
|
||||
import { ReactNode, useCallback, useEffect, useRef } from 'react'
|
||||
|
||||
type Props = {
|
||||
children: ReactNode
|
||||
@ -6,20 +6,44 @@ type Props = {
|
||||
|
||||
const ListContainer: React.FC<Props> = ({ children }) => {
|
||||
const listRef = useRef<HTMLDivElement>(null)
|
||||
const prevScrollTop = useRef(0)
|
||||
const isUserManuallyScrollingUp = useRef(false)
|
||||
|
||||
const handleScroll = useCallback((event: React.UIEvent<HTMLElement>) => {
|
||||
const currentScrollTop = event.currentTarget.scrollTop
|
||||
|
||||
if (prevScrollTop.current > currentScrollTop) {
|
||||
console.debug('User is manually scrolling up')
|
||||
isUserManuallyScrollingUp.current = true
|
||||
} else {
|
||||
const currentScrollTop = event.currentTarget.scrollTop
|
||||
const scrollHeight = event.currentTarget.scrollHeight
|
||||
const clientHeight = event.currentTarget.clientHeight
|
||||
|
||||
if (currentScrollTop + clientHeight >= scrollHeight) {
|
||||
console.debug('Scrolled to the bottom')
|
||||
isUserManuallyScrollingUp.current = false
|
||||
}
|
||||
}
|
||||
|
||||
prevScrollTop.current = currentScrollTop
|
||||
}, [])
|
||||
|
||||
useEffect(() => {
|
||||
const scrollHeight = listRef.current?.scrollHeight ?? 0
|
||||
if (isUserManuallyScrollingUp.current === true) return
|
||||
|
||||
const scrollHeight = listRef.current?.scrollHeight ?? 0
|
||||
listRef.current?.scrollTo({
|
||||
top: scrollHeight,
|
||||
behavior: 'smooth',
|
||||
behavior: 'instant',
|
||||
})
|
||||
})
|
||||
}, [listRef.current?.scrollHeight, isUserManuallyScrollingUp])
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={listRef}
|
||||
className="flex h-full w-full flex-col overflow-y-scroll"
|
||||
onScroll={handleScroll}
|
||||
>
|
||||
{children}
|
||||
</div>
|
||||
|
||||
26
web/containers/LoadingModal/index.tsx
Normal file
26
web/containers/LoadingModal/index.tsx
Normal file
@ -0,0 +1,26 @@
|
||||
import { Modal, ModalContent, ModalHeader, ModalTitle } from '@janhq/uikit'
|
||||
import { atom, useAtomValue } from 'jotai'
|
||||
|
||||
export type LoadingInfo = {
|
||||
title: string
|
||||
message: string
|
||||
}
|
||||
|
||||
export const loadingModalInfoAtom = atom<LoadingInfo | undefined>(undefined)
|
||||
|
||||
const ResettingModal: React.FC = () => {
|
||||
const loadingInfo = useAtomValue(loadingModalInfoAtom)
|
||||
|
||||
return (
|
||||
<Modal open={loadingInfo != null}>
|
||||
<ModalContent>
|
||||
<ModalHeader>
|
||||
<ModalTitle>{loadingInfo?.title}</ModalTitle>
|
||||
</ModalHeader>
|
||||
<p className="text-muted-foreground">{loadingInfo?.message}</p>
|
||||
</ModalContent>
|
||||
</Modal>
|
||||
)
|
||||
}
|
||||
|
||||
export default ResettingModal
|
||||
101
web/containers/Providers/DeepLinkListener.tsx
Normal file
101
web/containers/Providers/DeepLinkListener.tsx
Normal file
@ -0,0 +1,101 @@
|
||||
import { Fragment, ReactNode } from 'react'
|
||||
|
||||
import { useSetAtom } from 'jotai'
|
||||
|
||||
import { useDebouncedCallback } from 'use-debounce'
|
||||
|
||||
import { useGetHFRepoData } from '@/hooks/useGetHFRepoData'
|
||||
|
||||
import { loadingModalInfoAtom } from '../LoadingModal'
|
||||
import { toaster } from '../Toast'
|
||||
|
||||
import {
|
||||
importHuggingFaceModelStageAtom,
|
||||
importingHuggingFaceRepoDataAtom,
|
||||
} from '@/helpers/atoms/HuggingFace.atom'
|
||||
type Props = {
|
||||
children: ReactNode
|
||||
}
|
||||
|
||||
const DeepLinkListener: React.FC<Props> = ({ children }) => {
|
||||
const { getHfRepoData } = useGetHFRepoData()
|
||||
const setLoadingInfo = useSetAtom(loadingModalInfoAtom)
|
||||
const setImportingHuggingFaceRepoData = useSetAtom(
|
||||
importingHuggingFaceRepoDataAtom
|
||||
)
|
||||
const setImportHuggingFaceModelStage = useSetAtom(
|
||||
importHuggingFaceModelStageAtom
|
||||
)
|
||||
|
||||
const handleDeepLinkAction = useDebouncedCallback(
|
||||
async (deepLinkAction: DeepLinkAction) => {
|
||||
if (
|
||||
deepLinkAction.action !== 'models' ||
|
||||
deepLinkAction.provider !== 'huggingface'
|
||||
) {
|
||||
console.error(
|
||||
`Invalid deeplink action (${deepLinkAction.action}) or provider (${deepLinkAction.provider})`
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
setLoadingInfo({
|
||||
title: 'Getting Hugging Face models',
|
||||
message: 'Please wait..',
|
||||
})
|
||||
const data = await getHfRepoData(deepLinkAction.resource)
|
||||
setImportingHuggingFaceRepoData(data)
|
||||
setImportHuggingFaceModelStage('REPO_DETAIL')
|
||||
setLoadingInfo(undefined)
|
||||
} catch (err) {
|
||||
setLoadingInfo(undefined)
|
||||
toaster({
|
||||
title: 'Failed to get Hugging Face models',
|
||||
description: err instanceof Error ? err.message : 'Unexpected Error',
|
||||
type: 'error',
|
||||
})
|
||||
console.error(err)
|
||||
}
|
||||
},
|
||||
300
|
||||
)
|
||||
|
||||
window.electronAPI?.onDeepLink((_event: string, input: string) => {
|
||||
window.core?.api?.ackDeepLink()
|
||||
|
||||
const action = deeplinkParser(input)
|
||||
if (!action) return
|
||||
handleDeepLinkAction(action)
|
||||
})
|
||||
|
||||
return <Fragment>{children}</Fragment>
|
||||
}
|
||||
|
||||
type DeepLinkAction = {
|
||||
action: string
|
||||
provider: string
|
||||
resource: string
|
||||
}
|
||||
|
||||
const deeplinkParser = (
|
||||
deepLink: string | undefined
|
||||
): DeepLinkAction | undefined => {
|
||||
if (!deepLink) return undefined
|
||||
|
||||
try {
|
||||
const url = new URL(deepLink)
|
||||
const params = url.pathname.split('/').filter((str) => str.length > 0)
|
||||
|
||||
if (params.length < 3) return undefined
|
||||
const action = params[0]
|
||||
const provider = params[1]
|
||||
const resource = params.slice(2).join('/')
|
||||
return { action, provider, resource }
|
||||
} catch (err) {
|
||||
console.error(err)
|
||||
return undefined
|
||||
}
|
||||
}
|
||||
|
||||
export default DeepLinkListener
|
||||
@ -22,6 +22,7 @@ import Loader from '../Loader'
|
||||
|
||||
import DataLoader from './DataLoader'
|
||||
|
||||
import DeepLinkListener from './DeepLinkListener'
|
||||
import KeyListener from './KeyListener'
|
||||
|
||||
import { extensionManager } from '@/extension'
|
||||
@ -78,7 +79,9 @@ const Providers = ({ children }: PropsWithChildren) => {
|
||||
<KeyListener>
|
||||
<EventListenerWrapper>
|
||||
<TooltipProvider delayDuration={0}>
|
||||
<DataLoader>{children}</DataLoader>
|
||||
<DataLoader>
|
||||
<DeepLinkListener>{children}</DeepLinkListener>
|
||||
</DataLoader>
|
||||
</TooltipProvider>
|
||||
</EventListenerWrapper>
|
||||
<Toaster />
|
||||
|
||||
@ -99,6 +99,11 @@ export const useCreateNewThread = () => {
|
||||
? { ctx_len: 2048 }
|
||||
: {}
|
||||
|
||||
const overriddenParameters =
|
||||
defaultModel?.parameters.max_tokens && defaultModel.parameters.max_tokens
|
||||
? { max_tokens: 2048 }
|
||||
: {}
|
||||
|
||||
const createdAt = Date.now()
|
||||
const assistantInfo: ThreadAssistantInfo = {
|
||||
assistant_id: assistant.id,
|
||||
@ -107,7 +112,8 @@ export const useCreateNewThread = () => {
|
||||
model: {
|
||||
id: defaultModel?.id ?? '*',
|
||||
settings: { ...defaultModel?.settings, ...overriddenSettings } ?? {},
|
||||
parameters: defaultModel?.parameters ?? {},
|
||||
parameters:
|
||||
{ ...defaultModel?.parameters, ...overriddenParameters } ?? {},
|
||||
engine: defaultModel?.engine,
|
||||
},
|
||||
instructions: assistant.instructions,
|
||||
|
||||
@ -22,8 +22,8 @@ const ChatBody: React.FC = () => {
|
||||
const downloadedModels = useAtomValue(downloadedModelsAtom)
|
||||
const loadModelError = useAtomValue(loadModelErrorAtom)
|
||||
|
||||
if (downloadedModels.length === 0) return <EmptyModel />
|
||||
if (messages.length === 0) return <EmptyThread />
|
||||
if (!downloadedModels.length) return <EmptyModel />
|
||||
if (!messages.length) return <EmptyThread />
|
||||
|
||||
return (
|
||||
<ListContainer>
|
||||
|
||||
@ -129,12 +129,10 @@ const EditChatInput: React.FC<Props> = ({ message }) => {
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="mx-auto flex w-full flex-shrink-0 items-end justify-center space-x-4 pb-0 pt-1">
|
||||
<div className="mx-auto flex w-full flex-shrink-0 flex-col items-start justify-center space-y-4 pb-0 pt-1">
|
||||
<div className="relative flex w-full flex-col">
|
||||
<Textarea
|
||||
className={twMerge(
|
||||
'max-h-[400px] resize-none overflow-y-hidden pr-20'
|
||||
)}
|
||||
className={twMerge('max-h-[400px] resize-none pr-20')}
|
||||
style={{ height: '40px' }}
|
||||
ref={textareaRef}
|
||||
onKeyDown={onKeyDown}
|
||||
|
||||
@ -3,12 +3,17 @@ import {
|
||||
InputComponentProps,
|
||||
CheckboxComponentProps,
|
||||
SliderComponentProps,
|
||||
InferenceEngine,
|
||||
} from '@janhq/core'
|
||||
|
||||
import { useAtomValue } from 'jotai/react'
|
||||
|
||||
import Checkbox from '@/containers/Checkbox'
|
||||
import ModelConfigInput from '@/containers/ModelConfigInput'
|
||||
import SliderRightPanel from '@/containers/SliderRightPanel'
|
||||
|
||||
import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
|
||||
|
||||
type Props = {
|
||||
componentProps: SettingComponentProps[]
|
||||
disabled?: boolean
|
||||
@ -20,6 +25,7 @@ const SettingComponent: React.FC<Props> = ({
|
||||
disabled = false,
|
||||
onValueUpdated,
|
||||
}) => {
|
||||
const activeThread = useAtomValue(activeThreadAtom)
|
||||
const components = componentProps.map((data) => {
|
||||
switch (data.controllerType) {
|
||||
case 'slider': {
|
||||
@ -31,7 +37,16 @@ const SettingComponent: React.FC<Props> = ({
|
||||
title={data.title}
|
||||
description={data.description}
|
||||
min={min}
|
||||
max={max}
|
||||
max={
|
||||
data.key === 'max_tokens' &&
|
||||
activeThread &&
|
||||
activeThread.assistants[0].model.engine === InferenceEngine.nitro
|
||||
? Number(
|
||||
activeThread &&
|
||||
activeThread.assistants[0].model.settings.ctx_len
|
||||
)
|
||||
: max
|
||||
}
|
||||
step={step}
|
||||
value={value}
|
||||
name={data.key}
|
||||
|
||||
@ -33,7 +33,7 @@ export const presetConfiguration: Record<string, SettingComponentProps> = {
|
||||
'The context length for model operations varies; the maximum depends on the specific model used.',
|
||||
controllerType: 'slider',
|
||||
controllerProps: {
|
||||
min: 0,
|
||||
min: 128,
|
||||
max: 4096,
|
||||
step: 128,
|
||||
value: 2048,
|
||||
|
||||
@ -118,6 +118,32 @@ const Sidebar: React.FC = () => {
|
||||
updateModelParameter(activeThread, {
|
||||
params: { [key]: value },
|
||||
})
|
||||
|
||||
if (
|
||||
activeThread.assistants[0].model.parameters.max_tokens &&
|
||||
activeThread.assistants[0].model.settings.ctx_len
|
||||
) {
|
||||
if (
|
||||
key === 'max_tokens' &&
|
||||
Number(value) > activeThread.assistants[0].model.settings.ctx_len
|
||||
) {
|
||||
updateModelParameter(activeThread, {
|
||||
params: {
|
||||
max_tokens: activeThread.assistants[0].model.settings.ctx_len,
|
||||
},
|
||||
})
|
||||
}
|
||||
if (
|
||||
key === 'ctx_len' &&
|
||||
Number(value) < activeThread.assistants[0].model.parameters.max_tokens
|
||||
) {
|
||||
updateModelParameter(activeThread, {
|
||||
params: {
|
||||
max_tokens: activeThread.assistants[0].model.settings.ctx_len,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
},
|
||||
[activeThread, setEngineParamsUpdate, stopModel, updateModelParameter]
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user