Merge pull request #2916 from janhq/dev

Release/0.4.13 to main
This commit is contained in:
Van Pham 2024-05-16 21:35:15 +07:00 committed by GitHub
commit f2947c14f5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
98 changed files with 1848 additions and 556 deletions

View File

@ -57,19 +57,19 @@ jobs:
rm -rf ~/jan rm -rf ~/jan
make clean make clean
# - name: Get Commit Message for PR - name: Get Commit Message for PR
# if : github.event_name == 'pull_request' if : github.event_name == 'pull_request'
# run: | run: |
# echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}})" >> $GITHUB_ENV echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}})" >> $GITHUB_ENV
# - name: Get Commit Message for push event - name: Get Commit Message for push event
# if : github.event_name == 'push' if : github.event_name == 'push'
# run: | run: |
# echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}})" >> $GITHUB_ENV echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}})" >> $GITHUB_ENV
# - name: "Config report portal" - name: "Config report portal"
# run: | run: |
# make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App macos" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}" make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App macos" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
- name: Linter and test - name: Linter and test
run: | run: |
@ -78,9 +78,9 @@ jobs:
make test make test
env: env:
CSC_IDENTITY_AUTO_DISCOVERY: "false" CSC_IDENTITY_AUTO_DISCOVERY: "false"
# TURBO_API: "${{ secrets.TURBO_API }}" TURBO_API: "${{ secrets.TURBO_API }}"
# TURBO_TEAM: "macos" TURBO_TEAM: "macos"
# TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}" TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
test-on-macos-pr-target: test-on-macos-pr-target:
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository
@ -141,16 +141,16 @@ jobs:
} }
make clean make clean
# - name: Get Commit Message for push event - name: Get Commit Message for push event
# if : github.event_name == 'push' if : github.event_name == 'push'
# shell: bash shell: bash
# run: | run: |
# echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}}" >> $GITHUB_ENV echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}}" >> $GITHUB_ENV
# - name: "Config report portal" - name: "Config report portal"
# shell: bash shell: bash
# run: | run: |
# make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Windows ${{ matrix.antivirus-tools }}" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}" make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Windows ${{ matrix.antivirus-tools }}" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
- name: Linter and test - name: Linter and test
shell: powershell shell: powershell
@ -158,10 +158,10 @@ jobs:
npm config set registry ${{ secrets.NPM_PROXY }} --global npm config set registry ${{ secrets.NPM_PROXY }} --global
yarn config set registry ${{ secrets.NPM_PROXY }} --global yarn config set registry ${{ secrets.NPM_PROXY }} --global
make test make test
# env: env:
# TURBO_API: "${{ secrets.TURBO_API }}" TURBO_API: "${{ secrets.TURBO_API }}"
# TURBO_TEAM: "windows" TURBO_TEAM: "windows"
# TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}" TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
test-on-windows-pr: test-on-windows-pr:
if: (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) if: (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository)
runs-on: windows-desktop-default-windows-security runs-on: windows-desktop-default-windows-security
@ -189,16 +189,16 @@ jobs:
} }
make clean make clean
# - name: Get Commit Message for PR - name: Get Commit Message for PR
# if : github.event_name == 'pull_request' if : github.event_name == 'pull_request'
# shell: bash shell: bash
# run: | run: |
# echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}}" >> $GITHUB_ENV echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}}" >> $GITHUB_ENV
# - name: "Config report portal" - name: "Config report portal"
# shell: bash shell: bash
# run: | run: |
# make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Windows" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}" make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Windows" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
- name: Linter and test - name: Linter and test
shell: powershell shell: powershell
@ -206,10 +206,10 @@ jobs:
npm config set registry ${{ secrets.NPM_PROXY }} --global npm config set registry ${{ secrets.NPM_PROXY }} --global
yarn config set registry ${{ secrets.NPM_PROXY }} --global yarn config set registry ${{ secrets.NPM_PROXY }} --global
make test make test
# env: env:
# TURBO_API: "${{ secrets.TURBO_API }}" TURBO_API: "${{ secrets.TURBO_API }}"
# TURBO_TEAM: "windows" TURBO_TEAM: "windows"
# TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}" TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
test-on-windows-pr-target: test-on-windows-pr-target:
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository
@ -266,20 +266,20 @@ jobs:
rm -rf ~/jan rm -rf ~/jan
make clean make clean
# - name: Get Commit Message for PR - name: Get Commit Message for PR
# if : github.event_name == 'pull_request' if : github.event_name == 'pull_request'
# run: | run: |
# echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}}" >> $GITHUB_ENV echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}}" >> $GITHUB_ENV
# - name: Get Commit Message for push event - name: Get Commit Message for push event
# if : github.event_name == 'push' if : github.event_name == 'push'
# run: | run: |
# echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}}" >> $GITHUB_ENV echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}}" >> $GITHUB_ENV
# - name: "Config report portal" - name: "Config report portal"
# shell: bash shell: bash
# run: | run: |
# make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Linux" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}" make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Linux" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
- name: Linter and test - name: Linter and test
run: | run: |
@ -288,10 +288,10 @@ jobs:
npm config set registry ${{ secrets.NPM_PROXY }} --global npm config set registry ${{ secrets.NPM_PROXY }} --global
yarn config set registry ${{ secrets.NPM_PROXY }} --global yarn config set registry ${{ secrets.NPM_PROXY }} --global
make test make test
# env: env:
# TURBO_API: "${{ secrets.TURBO_API }}" TURBO_API: "${{ secrets.TURBO_API }}"
# TURBO_TEAM: "linux" TURBO_TEAM: "linux"
# TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}" TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
test-on-ubuntu-pr-target: test-on-ubuntu-pr-target:
runs-on: [self-hosted, Linux, ubuntu-desktop] runs-on: [self-hosted, Linux, ubuntu-desktop]

View File

@ -41,7 +41,7 @@ on:
jobs: jobs:
build-macos: build-macos:
runs-on: macos-silicon runs-on: macos-latest
environment: production environment: production
permissions: permissions:
contents: write contents: write
@ -55,15 +55,9 @@ jobs:
uses: actions/setup-node@v1 uses: actions/setup-node@v1
with: with:
node-version: 20 node-version: 20
- name: Unblock keychain
run: |
security unlock-keychain -p ${{ secrets.KEYCHAIN_PASSWORD }} ~/Library/Keychains/login.keychain-db
# - uses: actions/setup-python@v5
# with:
# python-version: '3.11'
# - name: Install jq - name: Install jq
# uses: dcarbone/install-jq-action@v2.0.1 uses: dcarbone/install-jq-action@v2.0.1
- name: Update app version based on latest release tag with build number - name: Update app version based on latest release tag with build number
if: inputs.public_provider != 'github' if: inputs.public_provider != 'github'
@ -101,17 +95,17 @@ jobs:
env: env:
VERSION_TAG: ${{ inputs.new_version }} VERSION_TAG: ${{ inputs.new_version }}
# - name: Get Cer for code signing - name: Get Cer for code signing
# run: base64 -d <<< "$CODE_SIGN_P12_BASE64" > /tmp/codesign.p12 run: base64 -d <<< "$CODE_SIGN_P12_BASE64" > /tmp/codesign.p12
# shell: bash shell: bash
# env: env:
# CODE_SIGN_P12_BASE64: ${{ secrets.CODE_SIGN_P12_BASE64 }} CODE_SIGN_P12_BASE64: ${{ secrets.CODE_SIGN_P12_BASE64 }}
# - uses: apple-actions/import-codesign-certs@v2 - uses: apple-actions/import-codesign-certs@v2
# continue-on-error: true continue-on-error: true
# with: with:
# p12-file-base64: ${{ secrets.CODE_SIGN_P12_BASE64 }} p12-file-base64: ${{ secrets.CODE_SIGN_P12_BASE64 }}
# p12-password: ${{ secrets.CODE_SIGN_P12_PASSWORD }} p12-password: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
- name: Build and publish app to cloudflare r2 or github artifactory - name: Build and publish app to cloudflare r2 or github artifactory
if: inputs.public_provider != 'github' if: inputs.public_provider != 'github'
@ -125,9 +119,9 @@ jobs:
fi fi
env: env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# CSC_LINK: "/tmp/codesign.p12" CSC_LINK: "/tmp/codesign.p12"
# CSC_KEY_PASSWORD: ${{ secrets.CODE_SIGN_P12_PASSWORD }} CSC_KEY_PASSWORD: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
# CSC_IDENTITY_AUTO_DISCOVERY: "true" CSC_IDENTITY_AUTO_DISCOVERY: "true"
APPLE_ID: ${{ secrets.APPLE_ID }} APPLE_ID: ${{ secrets.APPLE_ID }}
APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }} APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
APP_PATH: "." APP_PATH: "."
@ -143,9 +137,9 @@ jobs:
make build-and-publish make build-and-publish
env: env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# CSC_LINK: "/tmp/codesign.p12" CSC_LINK: "/tmp/codesign.p12"
# CSC_KEY_PASSWORD: ${{ secrets.CODE_SIGN_P12_PASSWORD }} CSC_KEY_PASSWORD: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
# CSC_IDENTITY_AUTO_DISCOVERY: "true" CSC_IDENTITY_AUTO_DISCOVERY: "true"
APPLE_ID: ${{ secrets.APPLE_ID }} APPLE_ID: ${{ secrets.APPLE_ID }}
APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }} APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
APP_PATH: "." APP_PATH: "."

View File

@ -159,4 +159,3 @@ jobs:
with: with:
name: latest-mac-x64 name: latest-mac-x64
path: ./electron/dist/latest-mac.yml path: ./electron/dist/latest-mac.yml

View File

@ -68,6 +68,10 @@ export function requestInference(
let cachedLines = '' let cachedLines = ''
for (const line of lines) { for (const line of lines) {
try { try {
if (transformResponse) {
content += transformResponse(line)
subscriber.next(content ?? '')
} else {
const toParse = cachedLines + line const toParse = cachedLines + line
if (!line.includes('data: [DONE]')) { if (!line.includes('data: [DONE]')) {
const data = JSON.parse(toParse.replace('data: ', '')) const data = JSON.parse(toParse.replace('data: ', ''))
@ -77,6 +81,7 @@ export function requestInference(
} }
if (content !== '') subscriber.next(content) if (content !== '') subscriber.next(content)
} }
}
} catch { } catch {
cachedLines = line cachedLines = line
} }

View File

@ -9,11 +9,11 @@ export const SUPPORTED_MODEL_FORMAT = '.gguf'
// The URL for the Nitro subprocess // The URL for the Nitro subprocess
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}` const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
// The URL for the Nitro subprocess to load a model // The URL for the Nitro subprocess to load a model
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel` export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
// The URL for the Nitro subprocess to validate a model // The URL for the Nitro subprocess to validate a model
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus` export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
// The URL for the Nitro subprocess to kill itself // The URL for the Nitro subprocess to kill itself
export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy` export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url

View File

@ -144,12 +144,12 @@ const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSe
} }
const spawnNitroProcess = async (): Promise<void> => { const spawnNitroProcess = async (): Promise<void> => {
log(`[SERVER]::Debug: Spawning Nitro subprocess...`) log(`[SERVER]::Debug: Spawning cortex subprocess...`)
let binaryFolder = join( let binaryFolder = join(
getJanExtensionsPath(), getJanExtensionsPath(),
'@janhq', '@janhq',
'inference-nitro-extension', 'inference-cortex-extension',
'dist', 'dist',
'bin' 'bin'
) )
@ -160,7 +160,7 @@ const spawnNitroProcess = async (): Promise<void> => {
const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()] const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
// Execute the binary // Execute the binary
log( log(
`[SERVER]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}` `[SERVER]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
) )
subprocess = spawn( subprocess = spawn(
executableOptions.executablePath, executableOptions.executablePath,
@ -184,12 +184,12 @@ const spawnNitroProcess = async (): Promise<void> => {
}) })
subprocess.on('close', (code: any) => { subprocess.on('close', (code: any) => {
log(`[SERVER]::Debug: Nitro exited with code: ${code}`) log(`[SERVER]::Debug: cortex exited with code: ${code}`)
subprocess = undefined subprocess = undefined
}) })
tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => { tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
log(`[SERVER]::Debug: Nitro is ready`) log(`[SERVER]::Debug: cortex is ready`)
}) })
} }
@ -203,13 +203,13 @@ const executableNitroFile = (): NitroExecutableOptions => {
let binaryFolder = join( let binaryFolder = join(
getJanExtensionsPath(), getJanExtensionsPath(),
'@janhq', '@janhq',
'inference-nitro-extension', 'inference-cortex-extension',
'dist', 'dist',
'bin' 'bin'
) )
let cudaVisibleDevices = '' let cudaVisibleDevices = ''
let binaryName = 'nitro' let binaryName = 'cortex-cpp'
/** /**
* The binary folder is different for each platform. * The binary folder is different for each platform.
*/ */
@ -228,12 +228,16 @@ const executableNitroFile = (): NitroExecutableOptions => {
} }
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram'] cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
} }
binaryName = 'nitro.exe' binaryName = 'cortex-cpp.exe'
} else if (process.platform === 'darwin') { } else if (process.platform === 'darwin') {
/** /**
* For MacOS: mac-universal both Silicon and InteL * For MacOS: mac-universal both Silicon and InteL
*/ */
binaryFolder = join(binaryFolder, 'mac-universal') if(process.arch === 'arm64') {
binaryFolder = join(binaryFolder, 'mac-arm64')
} else {
binaryFolder = join(binaryFolder, 'mac-amd64')
}
} else { } else {
/** /**
* For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0 * For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
@ -300,7 +304,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
retryDelay: 500, retryDelay: 500,
}) })
.then((res: any) => { .then((res: any) => {
log(`[SERVER]::Debug: Load model success with response ${JSON.stringify(res)}`) log(`[SERVER]::Debug: Load model request with response ${JSON.stringify(res)}`)
return Promise.resolve(res) return Promise.resolve(res)
}) })
.catch((err: any) => { .catch((err: any) => {
@ -327,7 +331,7 @@ export const stopModel = async (_modelId: string) => {
}) })
}, 5000) }, 5000)
const tcpPortUsed = require('tcp-port-used') const tcpPortUsed = require('tcp-port-used')
log(`[SERVER]::Debug: Request to kill Nitro`) log(`[SERVER]::Debug: Request to kill cortex`)
fetch(NITRO_HTTP_KILL_URL, { fetch(NITRO_HTTP_KILL_URL, {
method: 'DELETE', method: 'DELETE',

View File

@ -4,7 +4,7 @@ import { log } from './logger'
export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => { export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
const cpu = await physicalCpuCount() const cpu = await physicalCpuCount()
log(`[NITRO]::CPU information - ${cpu}`) log(`[CORTEX]::CPU information - ${cpu}`)
return { return {
numCpuPhysicalCore: cpu, numCpuPhysicalCore: cpu,

View File

@ -19,6 +19,7 @@ export enum NativeRoute {
showMainWindow = 'showMainWindow', showMainWindow = 'showMainWindow',
quickAskSizeUpdated = 'quickAskSizeUpdated', quickAskSizeUpdated = 'quickAskSizeUpdated',
ackDeepLink = 'ackDeepLink',
} }
/** /**
@ -45,6 +46,8 @@ export enum AppEvent {
onUserSubmitQuickAsk = 'onUserSubmitQuickAsk', onUserSubmitQuickAsk = 'onUserSubmitQuickAsk',
onSelectedText = 'onSelectedText', onSelectedText = 'onSelectedText',
onDeepLink = 'onDeepLink',
} }
export enum DownloadRoute { export enum DownloadRoute {

View File

@ -151,4 +151,8 @@ export function handleAppIPCs() {
async (_event, heightOffset: number): Promise<void> => async (_event, heightOffset: number): Promise<void> =>
windowManager.expandQuickAskWindow(heightOffset) windowManager.expandQuickAskWindow(heightOffset)
) )
ipcMain.handle(NativeRoute.ackDeepLink, async (_event): Promise<void> => {
windowManager.ackDeepLink()
})
} }

View File

@ -1,6 +1,6 @@
import { app, BrowserWindow } from 'electron' import { app, BrowserWindow } from 'electron'
import { join } from 'path' import { join, resolve } from 'path'
/** /**
* Managers * Managers
**/ **/
@ -39,15 +39,44 @@ const quickAskUrl = `${mainUrl}/search`
const gotTheLock = app.requestSingleInstanceLock() const gotTheLock = app.requestSingleInstanceLock()
if (process.defaultApp) {
if (process.argv.length >= 2) {
app.setAsDefaultProtocolClient('jan', process.execPath, [
resolve(process.argv[1]),
])
}
} else {
app.setAsDefaultProtocolClient('jan')
}
const createMainWindow = () => {
const startUrl = app.isPackaged ? `file://${mainPath}` : mainUrl
windowManager.createMainWindow(preloadPath, startUrl)
}
app app
.whenReady() .whenReady()
.then(() => { .then(() => {
if (!gotTheLock) { if (!gotTheLock) {
app.quit() app.quit()
throw new Error('Another instance of the app is already running') throw new Error('Another instance of the app is already running')
} else {
app.on(
'second-instance',
(_event, commandLine, _workingDirectory): void => {
if (process.platform === 'win32' || process.platform === 'linux') {
// this is for handling deeplink on windows and linux
// since those OS will emit second-instance instead of open-url
const url = commandLine.pop()
if (url) {
windowManager.sendMainAppDeepLink(url)
}
}
windowManager.showMainWindow()
}
)
} }
}) })
.then(setupReactDevTool)
.then(setupCore) .then(setupCore)
.then(createUserSpace) .then(createUserSpace)
.then(migrateExtensions) .then(migrateExtensions)
@ -60,6 +89,7 @@ app
.then(registerGlobalShortcuts) .then(registerGlobalShortcuts)
.then(() => { .then(() => {
if (!app.isPackaged) { if (!app.isPackaged) {
setupReactDevTool()
windowManager.mainWindow?.webContents.openDevTools() windowManager.mainWindow?.webContents.openDevTools()
} }
}) })
@ -75,11 +105,11 @@ app
}) })
}) })
app.on('second-instance', (_event, _commandLine, _workingDirectory) => { app.on('open-url', (_event, url) => {
windowManager.showMainWindow() windowManager.sendMainAppDeepLink(url)
}) })
app.on('before-quit', function (evt) { app.on('before-quit', function (_event) {
trayManager.destroyCurrentTray() trayManager.destroyCurrentTray()
}) })
@ -104,11 +134,6 @@ function createQuickAskWindow() {
windowManager.createQuickAskWindow(preloadPath, startUrl) windowManager.createQuickAskWindow(preloadPath, startUrl)
} }
function createMainWindow() {
const startUrl = app.isPackaged ? `file://${mainPath}` : mainUrl
windowManager.createMainWindow(preloadPath, startUrl)
}
/** /**
* Handles various IPC messages from the renderer process. * Handles various IPC messages from the renderer process.
*/ */

View File

@ -14,9 +14,9 @@ class WindowManager {
private _quickAskWindowVisible = false private _quickAskWindowVisible = false
private _mainWindowVisible = false private _mainWindowVisible = false
private deeplink: string | undefined
/** /**
* Creates a new window instance. * Creates a new window instance.
* @param {Electron.BrowserWindowConstructorOptions} options - The options to create the window with.
* @returns The created window instance. * @returns The created window instance.
*/ */
createMainWindow(preloadPath: string, startUrl: string) { createMainWindow(preloadPath: string, startUrl: string) {
@ -29,6 +29,17 @@ class WindowManager {
}, },
}) })
if (process.platform === 'win32' || process.platform === 'linux') {
/// This is work around for windows deeplink.
/// second-instance event is not fired when app is not open, so the app
/// does not received the deeplink.
const commandLine = process.argv.slice(1)
if (commandLine.length > 0) {
const url = commandLine[0]
this.sendMainAppDeepLink(url)
}
}
/* Load frontend app to the window */ /* Load frontend app to the window */
this.mainWindow.loadURL(startUrl) this.mainWindow.loadURL(startUrl)
@ -123,6 +134,22 @@ class WindowManager {
) )
} }
/**
* Try to send the deep link to the main app.
*/
sendMainAppDeepLink(url: string): void {
this.deeplink = url
const interval = setInterval(() => {
if (!this.deeplink) clearInterval(interval)
const mainWindow = this.mainWindow
if (mainWindow) {
mainWindow.webContents.send(AppEvent.onDeepLink, this.deeplink)
if (mainWindow.isMinimized()) mainWindow.restore()
mainWindow.focus()
}
}, 500)
}
cleanUp(): void { cleanUp(): void {
if (!this.mainWindow?.isDestroyed()) { if (!this.mainWindow?.isDestroyed()) {
this.mainWindow?.close() this.mainWindow?.close()
@ -137,6 +164,13 @@ class WindowManager {
this._quickAskWindowVisible = false this._quickAskWindowVisible = false
} }
} }
/**
* Acknowledges that the window has received a deep link. We can remove it.
*/
ackDeepLink() {
this.deeplink = undefined
}
} }
export const windowManager = new WindowManager() export const windowManager = new WindowManager()

View File

@ -61,6 +61,14 @@
"include": "scripts/uninstaller.nsh", "include": "scripts/uninstaller.nsh",
"deleteAppDataOnUninstall": true "deleteAppDataOnUninstall": true
}, },
"protocols": [
{
"name": "Jan",
"schemes": [
"jan"
]
}
],
"artifactName": "jan-${os}-${arch}-${version}.${ext}" "artifactName": "jan-${os}-${arch}-${version}.${ext}"
}, },
"scripts": { "scripts": {
@ -96,7 +104,7 @@
"request": "^2.88.2", "request": "^2.88.2",
"request-progress": "^3.0.0", "request-progress": "^3.0.0",
"ulidx": "^2.3.0", "ulidx": "^2.3.0",
"@nut-tree/nut-js": "^4.0.0" "@kirillvakalov/nut-tree__nut-js": "4.2.1-2"
}, },
"devDependencies": { "devDependencies": {
"@electron/notarize": "^2.1.0", "@electron/notarize": "^2.1.0",

View File

@ -1,7 +1,4 @@
import { app } from 'electron'
export const setupReactDevTool = async () => { export const setupReactDevTool = async () => {
if (!app.isPackaged) {
// Which means you're running from source code // Which means you're running from source code
const { default: installExtension, REACT_DEVELOPER_TOOLS } = await import( const { default: installExtension, REACT_DEVELOPER_TOOLS } = await import(
'electron-devtools-installer' 'electron-devtools-installer'
@ -14,4 +11,3 @@ export const setupReactDevTool = async () => {
// Only log the error and don't throw it because it's not critical // Only log the error and don't throw it because it's not critical
} }
} }
}

View File

@ -1,5 +1,5 @@
import { clipboard, globalShortcut } from 'electron' import { clipboard, globalShortcut } from 'electron'
import { keyboard, Key } from '@nut-tree/nut-js' import { keyboard, Key } from "@kirillvakalov/nut-tree__nut-js"
/** /**
* Gets selected text by synthesizing the keyboard shortcut * Gets selected text by synthesizing the keyboard shortcut

View File

@ -10,11 +10,12 @@ export function toolRetrievalUpdateTextSplitter(
} }
export async function toolRetrievalIngestNewDocument( export async function toolRetrievalIngestNewDocument(
file: string, file: string,
model: string,
engine: string engine: string
) { ) {
const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file)) const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file))
const threadPath = path.dirname(filePath.replace('files', '')) const threadPath = path.dirname(filePath.replace('files', ''))
retrieval.updateEmbeddingEngine(engine) retrieval.updateEmbeddingEngine(model, engine)
return retrieval return retrieval
.ingestAgentKnowledge(filePath, `${threadPath}/memory`) .ingestAgentKnowledge(filePath, `${threadPath}/memory`)
.catch((err) => { .catch((err) => {

View File

@ -28,14 +28,14 @@ export class Retrieval {
}) })
} }
public updateEmbeddingEngine(engine: string): void { public updateEmbeddingEngine(model: string, engine: string): void {
// Engine settings are not compatible with the current embedding model params // Engine settings are not compatible with the current embedding model params
// Switch case manually for now // Switch case manually for now
if (engine === 'nitro') { if (engine === 'nitro') {
this.embeddingModel = new OpenAIEmbeddings( this.embeddingModel = new OpenAIEmbeddings(
{ openAIApiKey: 'nitro-embedding' }, { openAIApiKey: 'nitro-embedding', model },
// TODO: Raw settings // TODO: Raw settings
{ basePath: 'http://127.0.0.1:3928/v1' } { basePath: 'http://127.0.0.1:3928/v1' },
) )
} else { } else {
// Fallback to OpenAI Settings // Fallback to OpenAI Settings

View File

@ -36,6 +36,7 @@ export class RetrievalTool extends InferenceTool {
NODE, NODE,
'toolRetrievalIngestNewDocument', 'toolRetrievalIngestNewDocument',
docFile, docFile,
data.model?.id,
data.model?.engine data.model?.engine
) )
} else { } else {

View File

@ -0,0 +1,79 @@
# Anthropic Engine Extension
Created using Jan extension example
# Create a Jan Extension using Typescript
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
## Create Your Own Extension
To create your own extension, you can use this repository as a template! Just follow the below instructions:
1. Click the Use this template button at the top of the repository
2. Select Create a new repository
3. Select an owner and name for your new repository
4. Click Create repository
5. Clone your new repository
## Initial Setup
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
> [!NOTE]
>
> You'll need to have a reasonably modern version of
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
> [`nodenv`](https://github.com/nodenv/nodenv) or
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
> root of your repository to install the version specified in
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
1. :hammer_and_wrench: Install the dependencies
```bash
npm install
```
1. :building_construction: Package the TypeScript for distribution
```bash
npm run bundle
```
1. :white_check_mark: Check your artifact
There will be a tgz file in your extension directory now
## Update the Extension Metadata
The [`package.json`](package.json) file defines metadata about your extension, such as
extension name, main entry, description and version.
When you copy this repository, update `package.json` with the name, description for your extension.
## Update the Extension Code
The [`src/`](./src/) directory is the heart of your extension! This contains the
source code that will be run when your extension functions are invoked. You can replace the
contents of this directory with your own code.
There are a few things to keep in mind when writing your extension code:
- Most Jan Extension functions are processed asynchronously.
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
```typescript
import { events, MessageEvent, MessageRequest } from '@janhq/core'
function onStart(): Promise<any> {
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
this.inference(data)
)
}
```
For more information about the Jan Extension Core module, see the
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
So, what are you waiting for? Go ahead and start customizing your extension!

View File

@ -0,0 +1,43 @@
{
"name": "@janhq/inference-anthropic-extension",
"productName": "Anthropic Inference Engine",
"version": "1.0.0",
"description": "This extension enables Anthropic chat completion API calls",
"main": "dist/index.js",
"module": "dist/module.js",
"engine": "anthropic",
"author": "Jan <service@jan.ai>",
"license": "AGPL-3.0",
"scripts": {
"build": "tsc -b . && webpack --config webpack.config.js",
"build:publish": "rimraf *.tgz --glob && yarn build && npm pack && cpx *.tgz ../../pre-install",
"sync:core": "cd ../.. && yarn build:core && cd extensions && rm yarn.lock && cd inference-anthropic-extension && yarn && yarn build:publish"
},
"exports": {
".": "./dist/index.js",
"./main": "./dist/module.js"
},
"devDependencies": {
"cpx": "^1.5.0",
"rimraf": "^3.0.2",
"webpack": "^5.88.2",
"webpack-cli": "^5.1.4",
"ts-loader": "^9.5.0"
},
"dependencies": {
"@janhq/core": "file:../../core",
"fetch-retry": "^5.0.6",
"ulidx": "^2.3.0"
},
"engines": {
"node": ">=18.0.0"
},
"files": [
"dist/*",
"package.json",
"README.md"
],
"bundleDependencies": [
"fetch-retry"
]
}

View File

@ -0,0 +1,83 @@
[
{
"sources": [
{
"url": "https://www.anthropic.com/"
}
],
"id": "claude-3-opus-20240229",
"object": "model",
"name": "Claude 3 Opus",
"version": "1.0",
"description": "Claude 3 Opus is a powerful model suitables for highly complex task.",
"format": "api",
"settings": {},
"parameters": {
"max_tokens": 4096,
"temperature": 0.7,
"stream": false
},
"metadata": {
"author": "Anthropic",
"tags": [
"General",
"Big Context Length"
]
},
"engine": "anthropic"
},
{
"sources": [
{
"url": "https://www.anthropic.com/"
}
],
"id": "claude-3-sonnet-20240229",
"object": "model",
"name": "Claude 3 Sonnet",
"version": "1.0",
"description": "Claude 3 Sonnet is an ideal model balance of intelligence and speed for enterprise workloads.",
"format": "api",
"settings": {},
"parameters": {
"max_tokens": 4096,
"temperature": 0.7,
"stream": false
},
"metadata": {
"author": "Anthropic",
"tags": [
"General",
"Big Context Length"
]
},
"engine": "anthropic"
},
{
"sources": [
{
"url": "https://www.anthropic.com/"
}
],
"id": "claude-3-haiku-20240307",
"object": "model",
"name": "Claude 3 Haiku",
"version": "1.0",
"description": "Claude 3 Haiku is the fastest model provides near-instant responsiveness.",
"format": "api",
"settings": {},
"parameters": {
"max_tokens": 4096,
"temperature": 0.7,
"stream": false
},
"metadata": {
"author": "Anthropic",
"tags": [
"General",
"Big Context Length"
]
},
"engine": "anthropic"
}
]

View File

@ -0,0 +1,23 @@
[
{
"key": "chat-completions-endpoint",
"title": "Chat Completions Endpoint",
"description": "The endpoint to use for chat completions. See the [Anthropic API documentation](https://docs.anthropic.com/claude/docs/intro-to-claude) for more information.",
"controllerType": "input",
"controllerProps": {
"placeholder": "https://api.anthropic.com/v1/messages",
"value": "https://api.anthropic.com/v1/messages"
}
},
{
"key": "anthropic-api-key",
"title": "API Key",
"description": "The Anthropic API uses API keys for authentication. Visit your [API Keys](https://console.anthropic.com/settings/keys) page to retrieve the API key you'll use in your requests.",
"controllerType": "input",
"controllerProps": {
"placeholder": "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"value": "",
"type": "password"
}
}
]

View File

@ -0,0 +1,124 @@
/**
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
* The class provides methods for initializing and stopping a model, and for making inference requests.
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
* @version 1.0.0
* @module inference-anthropic-extension/src/index
*/
import { RemoteOAIEngine } from '@janhq/core'
import { PayloadType } from '@janhq/core'
import { ChatCompletionRole } from '@janhq/core'
declare const SETTINGS: Array<any>
declare const MODELS: Array<any>
enum Settings {
apiKey = 'anthropic-api-key',
chatCompletionsEndPoint = 'chat-completions-endpoint',
}
type AnthropicPayloadType = {
model?: string
max_tokens?: number
messages?: Array<{ role: string; content: string }>
}
/**
* A class that implements the InferenceExtension interface from the @janhq/core package.
* The class provides methods for initializing and stopping a model, and for making inference requests.
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
*/
export default class JanInferenceAnthropicExtension extends RemoteOAIEngine {
inferenceUrl: string = ''
provider: string = 'anthropic'
maxTokens: number = 4096
override async onLoad(): Promise<void> {
super.onLoad()
// Register Settings
this.registerSettings(SETTINGS)
this.registerModels(MODELS)
this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
this.inferenceUrl = await this.getSetting<string>(
Settings.chatCompletionsEndPoint,
''
)
if (this.inferenceUrl.length === 0) {
SETTINGS.forEach((setting) => {
if (setting.key === Settings.chatCompletionsEndPoint) {
this.inferenceUrl = setting.controllerProps.value as string
}
})
}
}
// Override the headers method to include the x-API-key in the request headers
override async headers(): Promise<HeadersInit> {
return {
'Content-Type': 'application/json',
'x-api-key': this.apiKey,
'anthropic-version': '2023-06-01',
}
}
onSettingUpdate<T>(key: string, value: T): void {
if (key === Settings.apiKey) {
this.apiKey = value as string
} else if (key === Settings.chatCompletionsEndPoint) {
if (typeof value !== 'string') return
if (value.trim().length === 0) {
SETTINGS.forEach((setting) => {
if (setting.key === Settings.chatCompletionsEndPoint) {
this.inferenceUrl = setting.controllerProps.value as string
}
})
} else {
this.inferenceUrl = value
}
}
}
// Override the transformPayload method to convert the payload to the required format
transformPayload = (payload: PayloadType): AnthropicPayloadType => {
if (!payload.messages || payload.messages.length === 0) {
return { max_tokens: this.maxTokens, messages: [], model: payload.model }
}
const convertedData: AnthropicPayloadType = {
max_tokens: this.maxTokens,
messages: [],
model: payload.model,
}
payload.messages.forEach((item, index) => {
if (item.role === ChatCompletionRole.User) {
convertedData.messages.push({
role: 'user',
content: item.content as string,
})
} else if (item.role === ChatCompletionRole.Assistant) {
convertedData.messages.push({
role: 'assistant',
content: item.content as string,
})
}
})
return convertedData
}
// Override the transformResponse method to convert the response to the required format
transformResponse = (data: any): string => {
if (data.content && data.content.length > 0 && data.content[0].text) {
return data.content[0].text
} else {
console.error('Invalid response format:', data)
return ''
}
}
}

View File

@ -0,0 +1,14 @@
{
"compilerOptions": {
"target": "es2016",
"module": "ES6",
"moduleResolution": "node",
"outDir": "./dist",
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"strict": false,
"skipLibCheck": true,
"rootDir": "./src"
},
"include": ["./src"]
}

View File

@ -0,0 +1,37 @@
const webpack = require('webpack')
const packageJson = require('./package.json')
const settingJson = require('./resources/settings.json')
const modelsJson = require('./resources/models.json')
module.exports = {
experiments: { outputModule: true },
entry: './src/index.ts', // Adjust the entry point to match your project's main file
mode: 'production',
module: {
rules: [
{
test: /\.tsx?$/,
use: 'ts-loader',
exclude: /node_modules/,
},
],
},
plugins: [
new webpack.DefinePlugin({
MODELS: JSON.stringify(modelsJson),
SETTINGS: JSON.stringify(settingJson),
ENGINE: JSON.stringify(packageJson.engine),
}),
],
output: {
filename: 'index.js', // Adjust the output file name as needed
library: { type: 'module' }, // Specify ESM output format
},
resolve: {
extensions: ['.ts', '.js'],
},
optimization: {
minimize: false,
},
// Add loaders and other configuration as needed for your project
}

View File

@ -19,7 +19,37 @@
}, },
"metadata": { "metadata": {
"author": "Cohere", "author": "Cohere",
"tags": ["General", "Big Context Length"] "tags": [
"General",
"Big Context Length"
]
},
"engine": "cohere"
},
{
"sources": [
{
"url": "https://cohere.com"
}
],
"id": "command-r",
"object": "model",
"name": "Command R",
"version": "1.0",
"description": "Command R is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.",
"format": "api",
"settings": {},
"parameters": {
"max_tokens": 128000,
"temperature": 0.7,
"stream": false
},
"metadata": {
"author": "Cohere",
"tags": [
"General",
"Big Context Length"
]
}, },
"engine": "cohere" "engine": "cohere"
} }

View File

@ -12,7 +12,7 @@
{ {
"key": "cohere-api-key", "key": "cohere-api-key",
"title": "API Key", "title": "API Key",
"description": "The Cohere API uses API keys for authentication. Visit your [API Keys](https://platform.openai.com/account/api-keys) page to retrieve the API key you'll use in your requests.", "description": "The Cohere API uses API keys for authentication. Visit your [API Keys](https://dashboard.cohere.com/api-keys) page to retrieve the API key you'll use in your requests.",
"controllerType": "input", "controllerType": "input",
"controllerProps": { "controllerProps": {
"placeholder": "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", "placeholder": "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",

View File

@ -3,7 +3,7 @@
* The class provides methods for initializing and stopping a model, and for making inference requests. * The class provides methods for initializing and stopping a model, and for making inference requests.
* It also subscribes to events emitted by the @janhq/core package and handles new message requests. * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
* @version 1.0.0 * @version 1.0.0
* @module inference-openai-extension/src/index * @module inference-cohere-extension/src/index
*/ */
import { RemoteOAIEngine } from '@janhq/core' import { RemoteOAIEngine } from '@janhq/core'
@ -26,8 +26,8 @@ enum RoleType {
type CoherePayloadType = { type CoherePayloadType = {
chat_history?: Array<{ role: RoleType; message: string }> chat_history?: Array<{ role: RoleType; message: string }>
message?: string, message?: string
preamble?: string, preamble?: string
} }
/** /**
@ -82,18 +82,24 @@ export default class JanInferenceCohereExtension extends RemoteOAIEngine {
if (payload.messages.length === 0) { if (payload.messages.length === 0) {
return {} return {}
} }
const { messages, ...params } = payload
const convertedData: CoherePayloadType = { const convertedData: CoherePayloadType = {
...params,
chat_history: [], chat_history: [],
message: '', message: '',
} }
payload.messages.forEach((item, index) => { messages.forEach((item, index) => {
// Assign the message of the last item to the `message` property // Assign the message of the last item to the `message` property
if (index === payload.messages.length - 1) { if (index === messages.length - 1) {
convertedData.message = item.content as string convertedData.message = item.content as string
return return
} }
if (item.role === ChatCompletionRole.User) { if (item.role === ChatCompletionRole.User) {
convertedData.chat_history.push({ role: RoleType.user, message: item.content as string}) convertedData.chat_history.push({
role: RoleType.user,
message: item.content as string,
})
} else if (item.role === ChatCompletionRole.Assistant) { } else if (item.role === ChatCompletionRole.Assistant) {
convertedData.chat_history.push({ convertedData.chat_history.push({
role: RoleType.chatbot, role: RoleType.chatbot,
@ -106,5 +112,7 @@ export default class JanInferenceCohereExtension extends RemoteOAIEngine {
return convertedData return convertedData
} }
transformResponse = (data: any) => data.text transformResponse = (data: any) => {
return typeof data === 'object' ? data.text : JSON.parse(data).text ?? ''
}
} }

View File

@ -1,7 +1,7 @@
{ {
"name": "@janhq/inference-groq-extension", "name": "@janhq/inference-groq-extension",
"productName": "Groq Inference Engine", "productName": "Groq Inference Engine",
"version": "1.0.0", "version": "1.0.1",
"description": "This extension enables fast Groq chat completion API calls", "description": "This extension enables fast Groq chat completion API calls",
"main": "dist/index.js", "main": "dist/index.js",
"module": "dist/module.js", "module": "dist/module.js",

View File

@ -8,22 +8,25 @@
"id": "llama3-70b-8192", "id": "llama3-70b-8192",
"object": "model", "object": "model",
"name": "Groq Llama 3 70b", "name": "Groq Llama 3 70b",
"version": "1.0", "version": "1.1",
"description": "Groq Llama 3 70b with supercharged speed!", "description": "Groq Llama 3 70b with supercharged speed!",
"format": "api", "format": "api",
"settings": { "settings": {},
"text_model": false
},
"parameters": { "parameters": {
"max_tokens": 8192, "max_tokens": 8192,
"temperature": 0.7, "temperature": 0.7,
"top_p": 1, "top_p": 0.95,
"stop": null, "stream": true,
"stream": true "stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
}, },
"metadata": { "metadata": {
"author": "Meta", "author": "Meta",
"tags": ["General", "Big Context Length"] "tags": [
"General",
"Big Context Length"
]
}, },
"engine": "groq" "engine": "groq"
}, },
@ -36,22 +39,25 @@
"id": "llama3-8b-8192", "id": "llama3-8b-8192",
"object": "model", "object": "model",
"name": "Groq Llama 3 8b", "name": "Groq Llama 3 8b",
"version": "1.0", "version": "1.1",
"description": "Groq Llama 3 8b with supercharged speed!", "description": "Groq Llama 3 8b with supercharged speed!",
"format": "api", "format": "api",
"settings": { "settings": {},
"text_model": false
},
"parameters": { "parameters": {
"max_tokens": 8192, "max_tokens": 8192,
"temperature": 0.7, "temperature": 0.7,
"top_p": 1, "top_p": 0.95,
"stop": null, "stream": true,
"stream": true "stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
}, },
"metadata": { "metadata": {
"author": "Meta", "author": "Meta",
"tags": ["General", "Big Context Length"] "tags": [
"General",
"Big Context Length"
]
}, },
"engine": "groq" "engine": "groq"
}, },
@ -64,50 +70,24 @@
"id": "gemma-7b-it", "id": "gemma-7b-it",
"object": "model", "object": "model",
"name": "Groq Gemma 7b Instruct", "name": "Groq Gemma 7b Instruct",
"version": "1.0", "version": "1.1",
"description": "Groq Gemma 7b Instruct with supercharged speed!", "description": "Groq Gemma 7b Instruct with supercharged speed!",
"format": "api", "format": "api",
"settings": { "settings": {},
"text_model": false
},
"parameters": { "parameters": {
"max_tokens": 4096, "max_tokens": 8192,
"temperature": 0.7, "temperature": 0.7,
"top_p": 1, "top_p": 0.95,
"stop": null, "stream": true,
"stream": true "stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
}, },
"metadata": { "metadata": {
"author": "Google", "author": "Google",
"tags": ["General"] "tags": [
}, "General"
"engine": "groq" ]
},
{
"sources": [
{
"url": "https://groq.com"
}
],
"id": "llama2-70b-4096",
"object": "model",
"name": "Groq Llama 2 70b",
"version": "1.0",
"description": "Groq Llama 2 70b with supercharged speed!",
"format": "api",
"settings": {
"text_model": false
},
"parameters": {
"max_tokens": 4096,
"temperature": 0.7,
"top_p": 1,
"stop": null,
"stream": true
},
"metadata": {
"author": "Meta",
"tags": ["General", "Big Context Length"]
}, },
"engine": "groq" "engine": "groq"
}, },
@ -120,22 +100,25 @@
"id": "mixtral-8x7b-32768", "id": "mixtral-8x7b-32768",
"object": "model", "object": "model",
"name": "Groq Mixtral 8x7b Instruct", "name": "Groq Mixtral 8x7b Instruct",
"version": "1.0", "version": "1.1",
"description": "Groq Mixtral 8x7b Instruct is Mixtral with supercharged speed!", "description": "Groq Mixtral 8x7b Instruct is Mixtral with supercharged speed!",
"format": "api", "format": "api",
"settings": { "settings": {},
"text_model": false
},
"parameters": { "parameters": {
"max_tokens": 4096, "max_tokens": 32768,
"temperature": 0.7, "temperature": 0.7,
"top_p": 1, "top_p": 0.95,
"stop": null, "stream": true,
"stream": true "stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
}, },
"metadata": { "metadata": {
"author": "Mistral", "author": "Mistral",
"tags": ["General", "Big Context Length"] "tags": [
"General",
"Big Context Length"
]
}, },
"engine": "groq" "engine": "groq"
} }

View File

@ -0,0 +1,79 @@
# Martian Engine Extension
Created using Jan extension example
# Create a Jan Extension using Typescript
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
## Create Your Own Extension
To create your own extension, you can use this repository as a template! Just follow the below instructions:
1. Click the Use this template button at the top of the repository
2. Select Create a new repository
3. Select an owner and name for your new repository
4. Click Create repository
5. Clone your new repository
## Initial Setup
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
> [!NOTE]
>
> You'll need to have a reasonably modern version of
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
> [`nodenv`](https://github.com/nodenv/nodenv) or
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
> root of your repository to install the version specified in
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
1. :hammer_and_wrench: Install the dependencies
```bash
npm install
```
1. :building_construction: Package the TypeScript for distribution
```bash
npm run bundle
```
1. :white_check_mark: Check your artifact
There will be a tgz file in your extension directory now
## Update the Extension Metadata
The [`package.json`](package.json) file defines metadata about your extension, such as
extension name, main entry, description and version.
When you copy this repository, update `package.json` with the name, description for your extension.
## Update the Extension Code
The [`src/`](./src/) directory is the heart of your extension! This contains the
source code that will be run when your extension functions are invoked. You can replace the
contents of this directory with your own code.
There are a few things to keep in mind when writing your extension code:
- Most Jan Extension functions are processed asynchronously.
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
```typescript
import { events, MessageEvent, MessageRequest } from '@janhq/core'
function onStart(): Promise<any> {
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
this.inference(data)
)
}
```
For more information about the Jan Extension Core module, see the
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
So, what are you waiting for? Go ahead and start customizing your extension!

View File

@ -0,0 +1,42 @@
{
"name": "@janhq/inference-martian-extension",
"productName": "Martian Inference Engine",
"version": "1.0.1",
"description": "This extension enables Martian chat completion API calls",
"main": "dist/index.js",
"module": "dist/module.js",
"engine": "martian",
"author": "Jan <service@jan.ai>",
"license": "AGPL-3.0",
"scripts": {
"build": "tsc -b . && webpack --config webpack.config.js",
"build:publish": "rimraf *.tgz --glob && yarn build && npm pack && cpx *.tgz ../../pre-install"
},
"exports": {
".": "./dist/index.js",
"./main": "./dist/module.js"
},
"devDependencies": {
"cpx": "^1.5.0",
"rimraf": "^3.0.2",
"webpack": "^5.88.2",
"webpack-cli": "^5.1.4",
"ts-loader": "^9.5.0"
},
"dependencies": {
"@janhq/core": "file:../../core",
"fetch-retry": "^5.0.6",
"ulidx": "^2.3.0"
},
"engines": {
"node": ">=18.0.0"
},
"files": [
"dist/*",
"package.json",
"README.md"
],
"bundleDependencies": [
"fetch-retry"
]
}

View File

@ -0,0 +1,32 @@
[
{
"sources": [
{
"url": "https://withmartian.com/"
}
],
"id": "router",
"object": "model",
"name": "Martian Model Router",
"version": "1.0",
"description": "Martian Model Router dynamically routes requests to the best LLM in real-time",
"format": "api",
"settings": {},
"parameters": {
"max_tokens": 4096,
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Martian",
"tags": [
"General"
]
},
"engine": "martian"
}
]

View File

@ -0,0 +1,23 @@
[
{
"key": "chat-completions-endpoint",
"title": "Chat Completions Endpoint",
"description": "The endpoint to use for chat completions. See the [Martian API documentation](https://docs.withmartian.com/martian-model-router/api-reference/get-chat-completions) for more information.",
"controllerType": "input",
"controllerProps": {
"placeholder": "https://withmartian.com/api/openai/v1/chat/completions",
"value": "https://withmartian.com/api/openai/v1/chat/completions"
}
},
{
"key": "martian-api-key",
"title": "API Key",
"description": "The Martian API uses API keys for authentication. Visit your [API Keys](https://withmartian.com/dashboard) page to retrieve the API key you'll use in your requests.",
"controllerType": "input",
"controllerProps": {
"placeholder": "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"value": "",
"type": "password"
}
}
]

View File

@ -0,0 +1,66 @@
/**
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
* The class provides methods for initializing and stopping a model, and for making inference requests.
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
* @version 1.0.0
* @module inference-martian-extension/src/index
*/
import { RemoteOAIEngine, SettingComponentProps } from '@janhq/core'
declare const SETTINGS: Array<any>
declare const MODELS: Array<any>
enum Settings {
apiKey = 'martian-api-key',
chatCompletionsEndPoint = 'chat-completions-endpoint',
}
/**
* A class that implements the InferenceExtension interface from the @janhq/core package.
* The class provides methods for initializing and stopping a model, and for making inference requests.
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
*/
export default class JanInferenceMartianExtension extends RemoteOAIEngine {
inferenceUrl: string = ''
provider: string = 'martian'
override async onLoad(): Promise<void> {
super.onLoad()
// Register Settings
this.registerSettings(SETTINGS)
this.registerModels(MODELS)
this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
this.inferenceUrl = await this.getSetting<string>(
Settings.chatCompletionsEndPoint,
''
)
if (this.inferenceUrl.length === 0) {
SETTINGS.forEach((setting) => {
if (setting.key === Settings.chatCompletionsEndPoint) {
this.inferenceUrl = setting.controllerProps.value as string
}
})
}
}
onSettingUpdate<T>(key: string, value: T): void {
if (key === Settings.apiKey) {
this.apiKey = value as string
} else if (key === Settings.chatCompletionsEndPoint) {
if (typeof value !== 'string') return
if (value.trim().length === 0) {
SETTINGS.forEach((setting) => {
if (setting.key === Settings.chatCompletionsEndPoint) {
this.inferenceUrl = setting.controllerProps.value as string
}
})
} else {
this.inferenceUrl = value
}
}
}
}

View File

@ -0,0 +1,14 @@
{
"compilerOptions": {
"target": "es2016",
"module": "ES6",
"moduleResolution": "node",
"outDir": "./dist",
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"strict": false,
"skipLibCheck": true,
"rootDir": "./src"
},
"include": ["./src"]
}

View File

@ -0,0 +1,37 @@
const webpack = require('webpack')
const packageJson = require('./package.json')
const settingJson = require('./resources/settings.json')
const modelsJson = require('./resources/models.json')
module.exports = {
experiments: { outputModule: true },
entry: './src/index.ts', // Adjust the entry point to match your project's main file
mode: 'production',
module: {
rules: [
{
test: /\.tsx?$/,
use: 'ts-loader',
exclude: /node_modules/,
},
],
},
plugins: [
new webpack.DefinePlugin({
MODELS: JSON.stringify(modelsJson),
SETTINGS: JSON.stringify(settingJson),
ENGINE: JSON.stringify(packageJson.engine),
}),
],
output: {
filename: 'index.js', // Adjust the output file name as needed
library: { type: 'module' }, // Specify ESM output format
},
resolve: {
extensions: ['.ts', '.js'],
},
optimization: {
minimize: false,
},
// Add loaders and other configuration as needed for your project
}

View File

@ -1,7 +1,7 @@
{ {
"name": "@janhq/inference-mistral-extension", "name": "@janhq/inference-mistral-extension",
"productName": "MistralAI Inference Engine", "productName": "MistralAI Inference Engine",
"version": "1.0.0", "version": "1.0.1",
"description": "This extension enables Mistral chat completion API calls", "description": "This extension enables Mistral chat completion API calls",
"main": "dist/index.js", "main": "dist/index.js",
"module": "dist/module.js", "module": "dist/module.js",

View File

@ -8,48 +8,20 @@
"id": "mistral-small-latest", "id": "mistral-small-latest",
"object": "model", "object": "model",
"name": "Mistral Small", "name": "Mistral Small",
"version": "1.0", "version": "1.1",
"description": "Mistral Small is the ideal choice for simpe tasks that one can do in builk - like Classification, Customer Support, or Text Generation. It offers excellent performance at an affordable price point.", "description": "Mistral Small is the ideal choice for simple tasks (Classification, Customer Support, or Text Generation) at an affordable price.",
"format": "api", "format": "api",
"settings": {}, "settings": {},
"parameters": { "parameters": {
"max_tokens": 4096, "max_tokens": 32000,
"temperature": 0.7 "temperature": 0.7,
"top_p": 0.95,
"stream": true
}, },
"metadata": { "metadata": {
"author": "Mistral", "author": "Mistral",
"tags": [ "tags": [
"Classification", "General"
"Customer Support",
"Text Generation"
]
},
"engine": "mistral"
},
{
"sources": [
{
"url": "https://docs.mistral.ai/api/"
}
],
"id": "mistral-medium-latest",
"object": "model",
"name": "Mistral Medium",
"version": "1.0",
"description": "Mistral Medium is the ideal for intermediate tasks that require moderate reasoning - like Data extraction, Summarizing a Document, Writing a Job Description, or Writing Product Descriptions. Mistral Medium strikes a balance between performance and capability, making it suitable for a wide range of tasks that only require language transformaion",
"format": "api",
"settings": {},
"parameters": {
"max_tokens": 4096,
"temperature": 0.7
},
"metadata": {
"author": "Mistral",
"tags": [
"Data extraction",
"Summarizing a Document",
"Writing a Job Description",
"Writing Product Descriptions"
] ]
}, },
"engine": "mistral" "engine": "mistral"
@ -63,21 +35,47 @@
"id": "mistral-large-latest", "id": "mistral-large-latest",
"object": "model", "object": "model",
"name": "Mistral Large", "name": "Mistral Large",
"version": "1.0", "version": "1.1",
"description": "Mistral Large is ideal for complex tasks that require large reasoning capabilities or are highly specialized - like Synthetic Text Generation, Code Generation, RAG, or Agents.", "description": "Mistral Large is ideal for complex tasks (Synthetic Text Generation, Code Generation, RAG, or Agents).",
"format": "api", "format": "api",
"settings": {}, "settings": {},
"parameters": { "parameters": {
"max_tokens": 4096, "max_tokens": 32000,
"temperature": 0.7 "temperature": 0.7,
"top_p": 0.95,
"stream": true
}, },
"metadata": { "metadata": {
"author": "Mistral", "author": "Mistral",
"tags": [ "tags": [
"Text Generation", "General"
"Code Generation", ]
"RAG", },
"Agents" "engine": "mistral"
},
{
"sources": [
{
"url": "https://docs.mistral.ai/api/"
}
],
"id": "open-mixtral-8x22b",
"object": "model",
"name": "Mixtral 8x22B",
"version": "1.1",
"description": "Mixtral 8x22B is a high-performance, cost-effective model designed for complex tasks.",
"format": "api",
"settings": {},
"parameters": {
"max_tokens": 32000,
"temperature": 0.7,
"top_p": 0.95,
"stream": true
},
"metadata": {
"author": "Mistral",
"tags": [
"General"
] ]
}, },
"engine": "mistral" "engine": "mistral"

View File

@ -0,0 +1,2 @@
bin
!version.txt

View File

@ -1 +1 @@
0.3.22 0.4.4

View File

@ -1,3 +1,3 @@
@echo off @echo off
set /p NITRO_VERSION=<./bin/version.txt set /p CORTEX_VERSION=<./bin/version.txt
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan

View File

@ -1,8 +1,8 @@
{ {
"name": "@janhq/inference-nitro-extension", "name": "@janhq/inference-cortex-extension",
"productName": "Nitro Inference Engine", "productName": "Cortex Inference Engine",
"version": "1.0.4", "version": "1.0.7",
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"main": "dist/index.js", "main": "dist/index.js",
"node": "dist/node/index.cjs.js", "node": "dist/node/index.cjs.js",
"author": "Jan <service@jan.ai>", "author": "Jan <service@jan.ai>",
@ -10,8 +10,8 @@
"scripts": { "scripts": {
"test": "jest", "test": "jest",
"build": "tsc --module commonjs && rollup -c rollup.config.ts", "build": "tsc --module commonjs && rollup -c rollup.config.ts",
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro", "downloadnitro:linux": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/cortex-cpp",
"downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-universal.tar.gz -o ./bin/ && mkdir -p ./bin/mac-universal && tar -zxvf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz --strip-components=1 -C ./bin/mac-universal && rm -rf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz && chmod +x ./bin/mac-universal/nitro", "downloadnitro:darwin": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-arm64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz --strip-components=1 -C ./bin/mac-arm64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz && chmod +x ./bin/mac-arm64/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-amd64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz --strip-components=1 -C ./bin/mac-amd64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz && chmod +x ./bin/mac-amd64/cortex-cpp",
"downloadnitro:win32": "download.bat", "downloadnitro:win32": "download.bat",
"downloadnitro": "run-script-os", "downloadnitro": "run-script-os",
"build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",

View File

@ -8,19 +8,20 @@
"id": "codeninja-1.0-7b", "id": "codeninja-1.0-7b",
"object": "model", "object": "model",
"name": "CodeNinja 7B Q4", "name": "CodeNinja 7B Q4",
"version": "1.0", "version": "1.1",
"description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.", "description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 8192,
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:", "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
"llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf" "llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf",
"ngl": 32
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 8192,
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0
}, },

View File

@ -8,19 +8,20 @@
"id": "command-r-34b", "id": "command-r-34b",
"object": "model", "object": "model",
"name": "Command-R v01 34B Q4", "name": "Command-R v01 34B Q4",
"version": "1.3", "version": "1.4",
"description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.", "description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 131072,
"prompt_template": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", "prompt_template": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
"llama_model_path": "c4ai-command-r-v01-Q4_K_M.gguf" "llama_model_path": "c4ai-command-r-v01-Q4_K_M.gguf",
"ngl": 40
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 131072,
"stop": [], "stop": [],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0

View File

@ -8,19 +8,20 @@
"id": "deepseek-coder-1.3b", "id": "deepseek-coder-1.3b",
"object": "model", "object": "model",
"name": "Deepseek Coder 1.3B Q8", "name": "Deepseek Coder 1.3B Q8",
"version": "1.0", "version": "1.1",
"description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 16384,
"prompt_template": "### Instruction:\n{prompt}\n### Response:", "prompt_template": "### Instruction:\n{prompt}\n### Response:",
"llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf" "llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf",
"ngl": 24
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 16384,
"stop": [], "stop": [],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0

View File

@ -1,26 +1,27 @@
{ {
"sources": [ "sources": [
{ {
"filename": "deepseek-coder-33b-instruct.Q5_K_M.gguf", "filename": "deepseek-coder-33b-instruct.Q4_K_M.gguf",
"url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q5_K_M.gguf" "url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q4_K_M.gguf"
} }
], ],
"id": "deepseek-coder-34b", "id": "deepseek-coder-34b",
"object": "model", "object": "model",
"name": "Deepseek Coder 33B Q5", "name": "Deepseek Coder 33B Q4",
"version": "1.0", "version": "1.1",
"description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 16384,
"prompt_template": "### Instruction:\n{prompt}\n### Response:", "prompt_template": "### Instruction:\n{prompt}\n### Response:",
"llama_model_path": "deepseek-coder-33b-instruct.Q5_K_M.gguf" "llama_model_path": "deepseek-coder-33b-instruct.Q4_K_M.gguf",
"ngl": 62
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 16384,
"stop": [], "stop": [],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0

View File

@ -1,32 +0,0 @@
{
"sources": [
{
"url": "https://huggingface.co/TheBloke/dolphin-2_6-phi-2-GGUF/resolve/main/dolphin-2_6-phi-2.Q8_0.gguf",
"filename": "dolphin-2_6-phi-2.Q8_0.gguf"
}
],
"id": "dolphin-phi-2",
"object": "model",
"name": "Dolphin Phi-2 2.7B Q8",
"version": "1.0",
"description": "Dolphin Phi-2 is a good alternative for Phi-2 in chatting",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "dolphin-2_6-phi-2.Q8_0.gguf"
},
"parameters": {
"max_tokens": 4096,
"stop": ["<|im_end|>"]
},
"metadata": {
"author": "Cognitive Computations, Microsoft",
"tags": [
"3B",
"Finetuned"
],
"size": 2960000000
},
"engine": "nitro"
}

View File

@ -8,19 +8,20 @@
"id": "gemma-2b", "id": "gemma-2b",
"object": "model", "object": "model",
"name": "Gemma 2B Q4", "name": "Gemma 2B Q4",
"version": "1.0", "version": "1.1",
"description": "Gemma is built from the same technology with Google's Gemini.", "description": "Gemma is built from the same technology with Google's Gemini.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 8192,
"prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model", "prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model",
"llama_model_path": "gemma-2b-it-q4_k_m.gguf" "llama_model_path": "gemma-2b-it-q4_k_m.gguf",
"ngl": 18
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 8192,
"stop": [], "stop": [],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0

View File

@ -8,19 +8,20 @@
"id": "gemma-7b", "id": "gemma-7b",
"object": "model", "object": "model",
"name": "Gemma 7B Q4", "name": "Gemma 7B Q4",
"version": "1.0", "version": "1.1",
"description": "Google's Gemma is built for multilingual purpose", "description": "Google's Gemma is built for multilingual purpose",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 8192,
"prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model", "prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model",
"llama_model_path": "gemma-7b-it-q4_K_M.gguf" "llama_model_path": "gemma-7b-it-q4_K_M.gguf",
"ngl": 28
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 8192,
"stop": [], "stop": [],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0

View File

@ -14,7 +14,8 @@
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 4096,
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]", "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
"llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf" "llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf",
"ngl": 80
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,

View File

@ -14,7 +14,8 @@
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 4096,
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]", "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
"llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf" "llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf",
"ngl": 32
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,

View File

@ -8,19 +8,20 @@
"id": "llama3-8b-instruct", "id": "llama3-8b-instruct",
"object": "model", "object": "model",
"name": "Llama 3 8B Q4", "name": "Llama 3 8B Q4",
"version": "1.0", "version": "1.1",
"description": "Meta's Llama 3 excels at general usage situations, including chat, general world knowledge, and coding.", "description": "Meta's Llama 3 excels at general usage situations, including chat, general world knowledge, and coding.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 8192, "ctx_len": 8192,
"prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"llama_model_path": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf" "llama_model_path": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"ngl": 32
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 8192,
"stop": ["<|end_of_text|>","<|eot_id|>"], "stop": ["<|end_of_text|>","<|eot_id|>"],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0

View File

@ -1,35 +1,38 @@
{ {
"sources": [ "sources": [
{ {
"filename": "Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf", "filename": "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf",
"url": "https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/resolve/main/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf" "url": "https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf"
} }
], ],
"id": "hermes-pro-7b", "id": "llama3-hermes-8b",
"object": "model", "object": "model",
"name": "Hermes Pro 7B Q4", "name": "Hermes Pro Llama 3 8B Q4",
"version": "1.1", "version": "1.1",
"description": "Hermes Pro is superior in Roleplaying, Reasoning and Explaining problem.", "description": "Hermes Pro is well-designed for General chat and JSON output.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 8192,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf" "llama_model_path": "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf",
"ngl": 32
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 8192,
"stop": [], "stop": [],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0
}, },
"metadata": { "metadata": {
"author": "NousResearch", "author": "NousResearch",
"tags": ["7B", "Finetuned"], "tags": [
"size": 4370000000 "7B",
"Finetuned"
],
"size": 4920000000
}, },
"engine": "nitro" "engine": "nitro"
} }

View File

@ -14,7 +14,8 @@
"settings": { "settings": {
"ctx_len": 2048, "ctx_len": 2048,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "llamacorn-1.1b-chat.Q8_0.gguf" "llama_model_path": "llamacorn-1.1b-chat.Q8_0.gguf",
"ngl": 22
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,

View File

@ -1,34 +0,0 @@
{
"sources": [
{
"filename": "miqu-1-70b.q4_k_m.gguf",
"url": "https://huggingface.co/miqudev/miqu-1-70b/resolve/main/miqu-1-70b.q4_k_m.gguf"
}
],
"id": "miqu-70b",
"object": "model",
"name": "Mistral 70B Q4",
"version": "1.0",
"description": "A leak weight of Mistral 70B model.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "[INST] {prompt} [/INST]",
"llama_model_path": "miqu-1-70b.q4_k_m.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "miqudev",
"tags": ["70B", "Foundational Model"],
"size": 26440000000
},
"engine": "nitro"
}

View File

@ -8,20 +8,21 @@
"id": "mistral-ins-7b-q4", "id": "mistral-ins-7b-q4",
"object": "model", "object": "model",
"name": "Mistral Instruct 7B Q4", "name": "Mistral Instruct 7B Q4",
"version": "1.0", "version": "1.1",
"description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding of the world.", "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding of the world.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 32768,
"prompt_template": "[INST] {prompt} [/INST]", "prompt_template": "[INST] {prompt} [/INST]",
"llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf" "llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"ngl": 32
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 32768,
"stop": [], "stop": ["[/INST]"],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0
}, },

View File

@ -8,19 +8,20 @@
"id": "mixtral-8x7b-instruct", "id": "mixtral-8x7b-instruct",
"object": "model", "object": "model",
"name": "Mixtral 8x7B Instruct Q4", "name": "Mixtral 8x7B Instruct Q4",
"version": "1.0", "version": "1.1",
"description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.", "description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 32768,
"prompt_template": "[INST] {prompt} [/INST]", "prompt_template": "[INST] {prompt} [/INST]",
"llama_model_path": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf" "llama_model_path": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf",
"ngl": 100
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 32768,
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0
}, },

View File

@ -8,19 +8,20 @@
"id": "noromaid-7b", "id": "noromaid-7b",
"object": "model", "object": "model",
"name": "Noromaid 7B Q4", "name": "Noromaid 7B Q4",
"version": "1.0", "version": "1.1",
"description": "The Noromaid 7b model is designed for role-playing with human-like behavior.", "description": "The Noromaid 7b model is designed for role-playing with human-like behavior.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 32768,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "Noromaid-7B-0.4-DPO.q4_k_m.gguf" "llama_model_path": "Noromaid-7B-0.4-DPO.q4_k_m.gguf",
"ngl": 32
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 32768,
"stop": [], "stop": [],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0

View File

@ -8,19 +8,20 @@
"id": "openchat-3.5-7b", "id": "openchat-3.5-7b",
"object": "model", "object": "model",
"name": "Openchat-3.5 7B Q4", "name": "Openchat-3.5 7B Q4",
"version": "1.0", "version": "1.1",
"description": "The performance of Openchat surpasses ChatGPT-3.5 and Grok-1 across various benchmarks.", "description": "The performance of Openchat surpasses ChatGPT-3.5 and Grok-1 across various benchmarks.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 8192,
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:", "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
"llama_model_path": "openchat-3.5-0106.Q4_K_M.gguf" "llama_model_path": "openchat-3.5-0106.Q4_K_M.gguf",
"ngl": 32
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 8192,
"stop": ["<|end_of_turn|>"], "stop": ["<|end_of_turn|>"],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0

View File

@ -1,34 +0,0 @@
{
"sources": [
{
"filename": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf",
"url": "https://huggingface.co/janhq/openhermes-2.5-neural-chat-v3-3-slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf"
}
],
"id": "openhermes-neural-7b",
"object": "model",
"name": "OpenHermes Neural 7B Q4",
"version": "1.1",
"description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Intel, Jan",
"tags": ["7B", "Merged"],
"size": 4370000000,
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/openhermes-neural-7b/cover.png"
},
"engine": "nitro"
}

View File

@ -13,7 +13,7 @@
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 4096,
"prompt_template": "<|system|>\n{system_message}<|end|>\n<|user|>\n{prompt}<|end|>\n<|assistant|>\n", "prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n",
"llama_model_path": "Phi-3-mini-4k-instruct-q4.gguf" "llama_model_path": "Phi-3-mini-4k-instruct-q4.gguf"
}, },
"parameters": { "parameters": {

View File

@ -8,19 +8,20 @@
"id": "phind-34b", "id": "phind-34b",
"object": "model", "object": "model",
"name": "Phind 34B Q4", "name": "Phind 34B Q4",
"version": "1.1", "version": "1.2",
"description": "Phind 34B is the best Open-source coding model.", "description": "Phind 34B is the best Open-source coding model.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 16384,
"prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant", "prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant",
"llama_model_path": "phind-codellama-34b-v2.Q4_K_M.gguf" "llama_model_path": "phind-codellama-34b-v2.Q4_K_M.gguf",
"ngl": 48
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 16384,
"stop": [], "stop": [],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0

View File

@ -8,19 +8,20 @@
"id": "qwen-7b", "id": "qwen-7b",
"object": "model", "object": "model",
"name": "Qwen Chat 7B Q4", "name": "Qwen Chat 7B Q4",
"version": "1.0", "version": "1.1",
"description": "Qwen is optimized at Chinese, ideal for everyday tasks.", "description": "Qwen is optimized at Chinese, ideal for everyday tasks.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 32768,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "qwen1_5-7b-chat-q4_k_m.gguf" "llama_model_path": "qwen1_5-7b-chat-q4_k_m.gguf",
"ngl": 32
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 32768,
"stop": [], "stop": [],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0

View File

@ -14,7 +14,8 @@
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 4096,
"prompt_template": "<|user|>\n{prompt}<|endoftext|>\n<|assistant|>", "prompt_template": "<|user|>\n{prompt}<|endoftext|>\n<|assistant|>",
"llama_model_path": "stablelm-zephyr-3b.Q8_0.gguf" "llama_model_path": "stablelm-zephyr-3b.Q8_0.gguf",
"ngl": 32
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,

View File

@ -12,15 +12,16 @@
"description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.", "description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 32768,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "stealth-v1.3.Q4_K_M.gguf" "llama_model_path": "stealth-v1.3.Q4_K_M.gguf",
"ngl": 32
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 32768,
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0
}, },

View File

@ -14,7 +14,8 @@
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 4096,
"prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>", "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>",
"llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
"ngl": 22
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,

View File

@ -12,15 +12,16 @@
"description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.", "description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 32768,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "trinity-v1.2.Q4_K_M.gguf" "llama_model_path": "trinity-v1.2.Q4_K_M.gguf",
"ngl": 32
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 32768,
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0
}, },

View File

@ -8,19 +8,20 @@
"id": "vistral-7b", "id": "vistral-7b",
"object": "model", "object": "model",
"name": "Vistral 7B Q4", "name": "Vistral 7B Q4",
"version": "1.0", "version": "1.1",
"description": "Vistral 7B has a deep understanding of Vietnamese.", "description": "Vistral 7B has a deep understanding of Vietnamese.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 32768,
"prompt_template": "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n{prompt} [/INST]", "prompt_template": "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n{prompt} [/INST]",
"llama_model_path": "vistral-7b-chat-dpo.Q4_K_M.gguf" "llama_model_path": "vistral-7b-chat-dpo.Q4_K_M.gguf",
"ngl": 32
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 32768,
"stop": [], "stop": [],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0

View File

@ -12,15 +12,16 @@
"description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.", "description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 16384,
"prompt_template": "### Instruction:\n{prompt}\n### Response:", "prompt_template": "### Instruction:\n{prompt}\n### Response:",
"llama_model_path": "wizardcoder-python-13b-v1.0.Q4_K_M.gguf" "llama_model_path": "wizardcoder-python-13b-v1.0.Q4_K_M.gguf",
"ngl": 40
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 4096, "max_tokens": 16384,
"stop": [], "stop": [],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0

View File

@ -14,7 +14,8 @@
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "yi-34b-chat.Q4_K_M.gguf" "llama_model_path": "yi-34b-chat.Q4_K_M.gguf",
"ngl": 60
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,

View File

@ -12,21 +12,17 @@ const codeninja7bJson = require('./resources/models/codeninja-1.0-7b/model.json'
const commandr34bJson = require('./resources/models/command-r-34b/model.json') const commandr34bJson = require('./resources/models/command-r-34b/model.json')
const deepseekCoder13bJson = require('./resources/models/deepseek-coder-1.3b/model.json') const deepseekCoder13bJson = require('./resources/models/deepseek-coder-1.3b/model.json')
const deepseekCoder34bJson = require('./resources/models/deepseek-coder-34b/model.json') const deepseekCoder34bJson = require('./resources/models/deepseek-coder-34b/model.json')
const dolphinPhi2Json = require('./resources/models/dolphin-phi-2/model.json')
const gemma2bJson = require('./resources/models/gemma-2b/model.json') const gemma2bJson = require('./resources/models/gemma-2b/model.json')
const gemma7bJson = require('./resources/models/gemma-7b/model.json') const gemma7bJson = require('./resources/models/gemma-7b/model.json')
const hermesPro7bJson = require('./resources/models/hermes-pro-7b/model.json')
const llama2Chat70bJson = require('./resources/models/llama2-chat-70b/model.json') const llama2Chat70bJson = require('./resources/models/llama2-chat-70b/model.json')
const llama2Chat7bJson = require('./resources/models/llama2-chat-7b/model.json') const llama2Chat7bJson = require('./resources/models/llama2-chat-7b/model.json')
const llamacorn1bJson = require('./resources/models/llamacorn-1.1b/model.json') const llamacorn1bJson = require('./resources/models/llamacorn-1.1b/model.json')
const llava13bJson = require('./resources/models/llava-13b/model.json') const llava13bJson = require('./resources/models/llava-13b/model.json')
const llava7bJson = require('./resources/models/llava-7b/model.json') const llava7bJson = require('./resources/models/llava-7b/model.json')
const miqu70bJson = require('./resources/models/miqu-70b/model.json')
const mistralIns7bq4Json = require('./resources/models/mistral-ins-7b-q4/model.json') const mistralIns7bq4Json = require('./resources/models/mistral-ins-7b-q4/model.json')
const mixtral8x7bInstructJson = require('./resources/models/mixtral-8x7b-instruct/model.json') const mixtral8x7bInstructJson = require('./resources/models/mixtral-8x7b-instruct/model.json')
const noromaid7bJson = require('./resources/models/noromaid-7b/model.json') const noromaid7bJson = require('./resources/models/noromaid-7b/model.json')
const openchat357bJson = require('./resources/models/openchat-3.5-7b/model.json') const openchat357bJson = require('./resources/models/openchat-3.5-7b/model.json')
const openhermesNeural7bJson = require('./resources/models/openhermes-neural-7b/model.json')
const phind34bJson = require('./resources/models/phind-34b/model.json') const phind34bJson = require('./resources/models/phind-34b/model.json')
const qwen7bJson = require('./resources/models/qwen-7b/model.json') const qwen7bJson = require('./resources/models/qwen-7b/model.json')
const stableZephyr3bJson = require('./resources/models/stable-zephyr-3b/model.json') const stableZephyr3bJson = require('./resources/models/stable-zephyr-3b/model.json')
@ -37,6 +33,7 @@ const vistral7bJson = require('./resources/models/vistral-7b/model.json')
const wizardcoder13bJson = require('./resources/models/wizardcoder-13b/model.json') const wizardcoder13bJson = require('./resources/models/wizardcoder-13b/model.json')
const yi34bJson = require('./resources/models/yi-34b/model.json') const yi34bJson = require('./resources/models/yi-34b/model.json')
const llama3Json = require('./resources/models/llama3-8b-instruct/model.json') const llama3Json = require('./resources/models/llama3-8b-instruct/model.json')
const llama3Hermes8bJson = require('./resources/models/llama3-hermes-8b/model.json')
export default [ export default [
{ {
@ -56,21 +53,17 @@ export default [
commandr34bJson, commandr34bJson,
deepseekCoder13bJson, deepseekCoder13bJson,
deepseekCoder34bJson, deepseekCoder34bJson,
dolphinPhi2Json,
gemma2bJson, gemma2bJson,
gemma7bJson, gemma7bJson,
hermesPro7bJson,
llama2Chat70bJson, llama2Chat70bJson,
llama2Chat7bJson, llama2Chat7bJson,
llamacorn1bJson, llamacorn1bJson,
llava13bJson, llava13bJson,
llava7bJson, llava7bJson,
miqu70bJson,
mistralIns7bq4Json, mistralIns7bq4Json,
mixtral8x7bInstructJson, mixtral8x7bInstructJson,
noromaid7bJson, noromaid7bJson,
openchat357bJson, openchat357bJson,
openhermesNeural7bJson,
phind34bJson, phind34bJson,
qwen7bJson, qwen7bJson,
stableZephyr3bJson, stableZephyr3bJson,
@ -80,13 +73,14 @@ export default [
vistral7bJson, vistral7bJson,
wizardcoder13bJson, wizardcoder13bJson,
yi34bJson, yi34bJson,
llama3Json llama3Json,
llama3Hermes8bJson
]), ]),
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
INFERENCE_URL: JSON.stringify( INFERENCE_URL: JSON.stringify(
process.env.INFERENCE_URL || process.env.INFERENCE_URL ||
'http://127.0.0.1:3928/inferences/llamacpp/chat_completion' 'http://127.0.0.1:3928/inferences/server/chat_completion'
), ),
TROUBLESHOOTING_URL: JSON.stringify( TROUBLESHOOTING_URL: JSON.stringify(
'https://jan.ai/guides/troubleshooting' 'https://jan.ai/guides/troubleshooting'

View File

@ -130,7 +130,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
const executableFolderPath = await joinPath([ const executableFolderPath = await joinPath([
janDataFolderPath, janDataFolderPath,
'engines', 'engines',
this.name ?? 'nitro', this.name ?? 'cortex-cpp',
this.version ?? '1.0.0', this.version ?? '1.0.0',
]) ])
@ -179,7 +179,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
const executableFolderPath = await joinPath([ const executableFolderPath = await joinPath([
janDataFolderPath, janDataFolderPath,
'engines', 'engines',
this.name ?? 'nitro', this.name ?? 'cortex-cpp',
this.version ?? '1.0.0', this.version ?? '1.0.0',
]) ])

View File

@ -33,9 +33,22 @@ describe('test executable nitro file', () => {
Object.defineProperty(process, 'platform', { Object.defineProperty(process, 'platform', {
value: 'darwin', value: 'darwin',
}) })
Object.defineProperty(process, 'arch', {
value: 'arm64',
})
expect(executableNitroFile(testSettings)).toEqual( expect(executableNitroFile(testSettings)).toEqual(
expect.objectContaining({ expect.objectContaining({
executablePath: expect.stringContaining(`mac-universal${sep}nitro`), executablePath: expect.stringContaining(`mac-arm64${sep}cortex-cpp`),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
)
Object.defineProperty(process, 'arch', {
value: 'amd64',
})
expect(executableNitroFile(testSettings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`mac-amd64${sep}cortex-cpp`),
cudaVisibleDevices: '', cudaVisibleDevices: '',
vkVisibleDevices: '', vkVisibleDevices: '',
}) })
@ -56,7 +69,7 @@ describe('test executable nitro file', () => {
} }
expect(executableNitroFile(settings)).toEqual( expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
executablePath: expect.stringContaining(`win-cpu${sep}nitro.exe`), executablePath: expect.stringContaining(`win-cpu${sep}cortex-cpp.exe`),
cudaVisibleDevices: '', cudaVisibleDevices: '',
vkVisibleDevices: '', vkVisibleDevices: '',
}) })
@ -89,7 +102,7 @@ describe('test executable nitro file', () => {
} }
expect(executableNitroFile(settings)).toEqual( expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
executablePath: expect.stringContaining(`win-cuda-11-7${sep}nitro.exe`), executablePath: expect.stringContaining(`win-cuda-11-7${sep}cortex-cpp.exe`),
cudaVisibleDevices: '0', cudaVisibleDevices: '0',
vkVisibleDevices: '0', vkVisibleDevices: '0',
}) })
@ -122,7 +135,7 @@ describe('test executable nitro file', () => {
} }
expect(executableNitroFile(settings)).toEqual( expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
executablePath: expect.stringContaining(`win-cuda-12-0${sep}nitro.exe`), executablePath: expect.stringContaining(`win-cuda-12-0${sep}cortex-cpp.exe`),
cudaVisibleDevices: '0', cudaVisibleDevices: '0',
vkVisibleDevices: '0', vkVisibleDevices: '0',
}) })
@ -139,7 +152,7 @@ describe('test executable nitro file', () => {
} }
expect(executableNitroFile(settings)).toEqual( expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
executablePath: expect.stringContaining(`linux-cpu${sep}nitro`), executablePath: expect.stringContaining(`linux-cpu${sep}cortex-cpp`),
cudaVisibleDevices: '', cudaVisibleDevices: '',
vkVisibleDevices: '', vkVisibleDevices: '',
}) })
@ -172,7 +185,7 @@ describe('test executable nitro file', () => {
} }
expect(executableNitroFile(settings)).toEqual( expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
executablePath: expect.stringContaining(`linux-cuda-11-7${sep}nitro`), executablePath: expect.stringContaining(`linux-cuda-11-7${sep}cortex-cpp`),
cudaVisibleDevices: '0', cudaVisibleDevices: '0',
vkVisibleDevices: '0', vkVisibleDevices: '0',
}) })
@ -205,7 +218,7 @@ describe('test executable nitro file', () => {
} }
expect(executableNitroFile(settings)).toEqual( expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
executablePath: expect.stringContaining(`linux-cuda-12-0${sep}nitro`), executablePath: expect.stringContaining(`linux-cuda-12-0${sep}cortex-cpp`),
cudaVisibleDevices: '0', cudaVisibleDevices: '0',
vkVisibleDevices: '0', vkVisibleDevices: '0',
}) })

View File

@ -1,4 +1,4 @@
import { GpuSetting, SystemInformation } from '@janhq/core' import { GpuSetting } from '@janhq/core'
import * as path from 'path' import * as path from 'path'
export interface NitroExecutableOptions { export interface NitroExecutableOptions {
@ -24,7 +24,7 @@ const os = (): string => {
return process.platform === 'win32' return process.platform === 'win32'
? 'win' ? 'win'
: process.platform === 'darwin' : process.platform === 'darwin'
? 'mac-universal' ? process.arch === 'arm64' ? 'mac-arm64' : 'mac-amd64'
: 'linux' : 'linux'
} }
@ -52,7 +52,7 @@ export const executableNitroFile = (
.join('-') .join('-')
let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
let binaryName = `nitro${extension()}` let binaryName = `cortex-cpp${extension()}`
return { return {
executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName), executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName),

View File

@ -34,9 +34,9 @@ const LOCAL_HOST = '127.0.0.1'
// The URL for the Nitro subprocess // The URL for the Nitro subprocess
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}` const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`
// The URL for the Nitro subprocess to load a model // The URL for the Nitro subprocess to load a model
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel` const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
// The URL for the Nitro subprocess to validate a model // The URL for the Nitro subprocess to validate a model
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus` const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
// The URL for the Nitro subprocess to kill itself // The URL for the Nitro subprocess to kill itself
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy` const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
@ -50,7 +50,7 @@ const SUPPORTED_MODEL_FORMAT = '.gguf'
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
// The current model settings // The current model settings
let currentSettings: ModelSettingParams | undefined = undefined let currentSettings: ModelSettingParams & { model?: string } | undefined = undefined
/** /**
* Stops a Nitro subprocess. * Stops a Nitro subprocess.
@ -77,7 +77,7 @@ async function loadModel(
} }
if (params.model.engine !== InferenceEngine.nitro) { if (params.model.engine !== InferenceEngine.nitro) {
return Promise.reject('Not a nitro model') return Promise.reject('Not a cortex model')
} else { } else {
const nitroResourceProbe = await getSystemResourceInfo() const nitroResourceProbe = await getSystemResourceInfo()
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
@ -135,6 +135,7 @@ async function loadModel(
// model.settings can override the default settings // model.settings can override the default settings
...params.model.settings, ...params.model.settings,
llama_model_path, llama_model_path,
model: params.model.id,
// This is critical and requires real CPU physical core count (or performance core) // This is critical and requires real CPU physical core count (or performance core)
...(params.model.settings.mmproj && { ...(params.model.settings.mmproj && {
mmproj: path.isAbsolute(params.model.settings.mmproj) mmproj: path.isAbsolute(params.model.settings.mmproj)
@ -142,7 +143,7 @@ async function loadModel(
: path.join(modelFolder, params.model.settings.mmproj), : path.join(modelFolder, params.model.settings.mmproj),
}), }),
} }
return runNitroAndLoadModel(systemInfo) return runNitroAndLoadModel(params.model.id, systemInfo)
} }
} }
@ -152,7 +153,7 @@ async function loadModel(
* 3. Validate model status * 3. Validate model status
* @returns * @returns
*/ */
async function runNitroAndLoadModel(systemInfo?: SystemInformation) { async function runNitroAndLoadModel(modelId: string, systemInfo?: SystemInformation) {
// Gather system information for CPU physical cores and memory // Gather system information for CPU physical cores and memory
return killSubprocess() return killSubprocess()
.then(() => .then(() =>
@ -160,10 +161,10 @@ async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
) )
.then(() => spawnNitroProcess(systemInfo)) .then(() => spawnNitroProcess(systemInfo))
.then(() => loadLLMModel(currentSettings)) .then(() => loadLLMModel(currentSettings))
.then(validateModelStatus) .then(() => validateModelStatus(modelId))
.catch((err) => { .catch((err) => {
// TODO: Broadcast error so app could display proper error message // TODO: Broadcast error so app could display proper error message
log(`[NITRO]::Error: ${err}`) log(`[CORTEX]::Error: ${err}`)
return { error: err } return { error: err }
}) })
} }
@ -222,7 +223,7 @@ function loadLLMModel(settings: any): Promise<Response> {
if (!settings?.ngl) { if (!settings?.ngl) {
settings.ngl = 100 settings.ngl = 100
} }
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`) log(`[CORTEX]::Debug: Loading model with params ${JSON.stringify(settings)}`)
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, { return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
method: 'POST', method: 'POST',
headers: { headers: {
@ -234,14 +235,14 @@ function loadLLMModel(settings: any): Promise<Response> {
}) })
.then((res) => { .then((res) => {
log( log(
`[NITRO]::Debug: Load model success with response ${JSON.stringify( `[CORTEX]::Debug: Load model success with response ${JSON.stringify(
res res
)}` )}`
) )
return Promise.resolve(res) return Promise.resolve(res)
}) })
.catch((err) => { .catch((err) => {
log(`[NITRO]::Error: Load model failed with error ${err}`) log(`[CORTEX]::Error: Load model failed with error ${err}`)
return Promise.reject(err) return Promise.reject(err)
}) })
} }
@ -252,11 +253,12 @@ function loadLLMModel(settings: any): Promise<Response> {
* If the model is loaded successfully, the object is empty. * If the model is loaded successfully, the object is empty.
* If the model is not loaded successfully, the object contains an error message. * If the model is not loaded successfully, the object contains an error message.
*/ */
async function validateModelStatus(): Promise<void> { async function validateModelStatus(modelId: string): Promise<void> {
// Send a GET request to the validation URL. // Send a GET request to the validation URL.
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries. // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, { return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
method: 'GET', method: 'POST',
body: JSON.stringify({ model: modelId }),
headers: { headers: {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}, },
@ -264,7 +266,7 @@ async function validateModelStatus(): Promise<void> {
retryDelay: 300, retryDelay: 300,
}).then(async (res: Response) => { }).then(async (res: Response) => {
log( log(
`[NITRO]::Debug: Validate model state with response ${JSON.stringify( `[CORTEX]::Debug: Validate model state with response ${JSON.stringify(
res.status res.status
)}` )}`
) )
@ -275,7 +277,7 @@ async function validateModelStatus(): Promise<void> {
// Otherwise, return an object with an error message. // Otherwise, return an object with an error message.
if (body.model_loaded) { if (body.model_loaded) {
log( log(
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify( `[CORTEX]::Debug: Validate model state success with response ${JSON.stringify(
body body
)}` )}`
) )
@ -283,7 +285,7 @@ async function validateModelStatus(): Promise<void> {
} }
} }
log( log(
`[NITRO]::Debug: Validate model state failed with response ${JSON.stringify( `[CORTEX]::Debug: Validate model state failed with response ${JSON.stringify(
res.statusText res.statusText
)}` )}`
) )
@ -298,7 +300,7 @@ async function validateModelStatus(): Promise<void> {
async function killSubprocess(): Promise<void> { async function killSubprocess(): Promise<void> {
const controller = new AbortController() const controller = new AbortController()
setTimeout(() => controller.abort(), 5000) setTimeout(() => controller.abort(), 5000)
log(`[NITRO]::Debug: Request to kill Nitro`) log(`[CORTEX]::Debug: Request to kill cortex`)
const killRequest = () => { const killRequest = () => {
return fetch(NITRO_HTTP_KILL_URL, { return fetch(NITRO_HTTP_KILL_URL, {
@ -309,17 +311,17 @@ async function killSubprocess(): Promise<void> {
.then(() => .then(() =>
tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
) )
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`)) .then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
.catch((err) => { .catch((err) => {
log( log(
`[NITRO]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}` `[CORTEX]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
) )
throw 'PORT_NOT_AVAILABLE' throw 'PORT_NOT_AVAILABLE'
}) })
} }
if (subprocess?.pid) { if (subprocess?.pid) {
log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`) log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
const pid = subprocess.pid const pid = subprocess.pid
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
terminate(pid, function (err) { terminate(pid, function (err) {
@ -329,7 +331,7 @@ async function killSubprocess(): Promise<void> {
tcpPortUsed tcpPortUsed
.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
.then(() => resolve()) .then(() => resolve())
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`)) .then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
.catch(() => { .catch(() => {
killRequest().then(resolve).catch(reject) killRequest().then(resolve).catch(reject)
}) })
@ -346,22 +348,24 @@ async function killSubprocess(): Promise<void> {
* @returns A promise that resolves when the Nitro subprocess is started. * @returns A promise that resolves when the Nitro subprocess is started.
*/ */
function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> { function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
log(`[NITRO]::Debug: Spawning Nitro subprocess...`) log(`[CORTEX]::Debug: Spawning cortex subprocess...`)
return new Promise<void>(async (resolve, reject) => { return new Promise<void>(async (resolve, reject) => {
let binaryFolder = path.join(__dirname, '..', 'bin') // Current directory by default
let executableOptions = executableNitroFile(systemInfo?.gpuSetting) let executableOptions = executableNitroFile(systemInfo?.gpuSetting)
const args: string[] = ['1', LOCAL_HOST, PORT.toString()] const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
// Execute the binary // Execute the binary
log( log(
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}` `[CORTEX]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
)
log(
path.parse(executableOptions.executablePath).dir
) )
subprocess = spawn( subprocess = spawn(
executableOptions.executablePath, executableOptions.executablePath,
['1', LOCAL_HOST, PORT.toString()], ['1', LOCAL_HOST, PORT.toString()],
{ {
cwd: binaryFolder, cwd: path.join(path.parse(executableOptions.executablePath).dir),
env: { env: {
...process.env, ...process.env,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
@ -375,15 +379,15 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
// Handle subprocess output // Handle subprocess output
subprocess.stdout.on('data', (data: any) => { subprocess.stdout.on('data', (data: any) => {
log(`[NITRO]::Debug: ${data}`) log(`[CORTEX]::Debug: ${data}`)
}) })
subprocess.stderr.on('data', (data: any) => { subprocess.stderr.on('data', (data: any) => {
log(`[NITRO]::Error: ${data}`) log(`[CORTEX]::Error: ${data}`)
}) })
subprocess.on('close', (code: any) => { subprocess.on('close', (code: any) => {
log(`[NITRO]::Debug: Nitro exited with code: ${code}`) log(`[CORTEX]::Debug: cortex exited with code: ${code}`)
subprocess = undefined subprocess = undefined
reject(`child process exited with code ${code}`) reject(`child process exited with code ${code}`)
}) })
@ -391,7 +395,7 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
tcpPortUsed tcpPortUsed
.waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000) .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
.then(() => { .then(() => {
log(`[NITRO]::Debug: Nitro is ready`) log(`[CORTEX]::Debug: cortex is ready`)
resolve() resolve()
}) })
}) })

View File

@ -1,7 +1,7 @@
{ {
"name": "@janhq/inference-openai-extension", "name": "@janhq/inference-openai-extension",
"productName": "OpenAI Inference Engine", "productName": "OpenAI Inference Engine",
"version": "1.0.0", "version": "1.0.2",
"description": "This extension enables OpenAI chat completion API calls", "description": "This extension enables OpenAI chat completion API calls",
"main": "dist/index.js", "main": "dist/index.js",
"module": "dist/module.js", "module": "dist/module.js",

View File

@ -5,20 +5,27 @@
"url": "https://openai.com" "url": "https://openai.com"
} }
], ],
"id": "gpt-4", "id": "gpt-4-turbo",
"object": "model", "object": "model",
"name": "OpenAI GPT 4", "name": "OpenAI GPT 4 Turbo",
"version": "1.0", "version": "1.2",
"description": "OpenAI GPT 4 model is extremely good", "description": "OpenAI GPT 4 Turbo model is extremely good",
"format": "api", "format": "api",
"settings": {}, "settings": {},
"parameters": { "parameters": {
"max_tokens": 4096, "max_tokens": 4096,
"temperature": 0.7 "temperature": 0.7,
"top_p": 0.95,
"stream": true,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
}, },
"metadata": { "metadata": {
"author": "OpenAI", "author": "OpenAI",
"tags": ["General", "Big Context Length"] "tags": [
"General"
]
}, },
"engine": "openai" "engine": "openai"
}, },
@ -31,8 +38,8 @@
"id": "gpt-4-vision-preview", "id": "gpt-4-vision-preview",
"object": "model", "object": "model",
"name": "OpenAI GPT 4 with Vision (Preview)", "name": "OpenAI GPT 4 with Vision (Preview)",
"version": "1.0", "version": "1.1",
"description": "OpenAI GPT 4 with Vision model is extremely good in preview", "description": "OpenAI GPT-4 Vision model features vision understanding capabilities",
"format": "api", "format": "api",
"settings": { "settings": {
"vision_model": true, "vision_model": true,
@ -40,34 +47,16 @@
}, },
"parameters": { "parameters": {
"max_tokens": 4096, "max_tokens": 4096,
"temperature": 0.7 "temperature": 0.7,
"top_p": 0.95,
"stream": true
}, },
"metadata": { "metadata": {
"author": "OpenAI", "author": "OpenAI",
"tags": ["General", "Big Context Length", "Vision"] "tags": [
}, "General",
"engine": "openai" "Vision"
}, ]
{
"sources": [
{
"url": "https://openai.com"
}
],
"id": "gpt-3.5-turbo-16k-0613",
"object": "model",
"name": "OpenAI GPT 3.5 Turbo 16k 0613",
"version": "1.0",
"description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good",
"format": "api",
"settings": {},
"parameters": {
"max_tokens": 4096,
"temperature": 0.7
},
"metadata": {
"author": "OpenAI",
"tags": ["General", "Big Context Length"]
}, },
"engine": "openai" "engine": "openai"
}, },
@ -80,17 +69,54 @@
"id": "gpt-3.5-turbo", "id": "gpt-3.5-turbo",
"object": "model", "object": "model",
"name": "OpenAI GPT 3.5 Turbo", "name": "OpenAI GPT 3.5 Turbo",
"version": "1.0", "version": "1.1",
"description": "OpenAI GPT 3.5 Turbo model is extremely good", "description": "OpenAI GPT 3.5 Turbo model is extremely fast",
"format": "api", "format": "api",
"settings": {}, "settings": {},
"parameters": { "parameters": {
"max_tokens": 4096, "max_tokens": 4096,
"temperature": 0.7 "temperature": 0.7,
"top_p": 0.95,
"stream": true,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
}, },
"metadata": { "metadata": {
"author": "OpenAI", "author": "OpenAI",
"tags": ["General", "Big Context Length"] "tags": [
"General"
]
},
"engine": "openai"
},
{
"sources": [
{
"url": "https://openai.com"
}
],
"id": "gpt-4o",
"object": "model",
"name": "OpenAI GPT 4o",
"version": "1.1",
"description": "OpenAI GPT 4o is a new flagship model with fast speed and high quality",
"format": "api",
"settings": {},
"parameters": {
"max_tokens": 4096,
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "OpenAI",
"tags": [
"General"
]
}, },
"engine": "openai" "engine": "openai"
} }

View File

@ -0,0 +1,79 @@
# Open Router Engine Extension
Created using Jan extension example
# Create a Jan Extension using Typescript
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
## Create Your Own Extension
To create your own extension, you can use this repository as a template! Just follow the below instructions:
1. Click the Use this template button at the top of the repository
2. Select Create a new repository
3. Select an owner and name for your new repository
4. Click Create repository
5. Clone your new repository
## Initial Setup
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
> [!NOTE]
>
> You'll need to have a reasonably modern version of
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
> [`nodenv`](https://github.com/nodenv/nodenv) or
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
> root of your repository to install the version specified in
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
1. :hammer_and_wrench: Install the dependencies
```bash
npm install
```
1. :building_construction: Package the TypeScript for distribution
```bash
npm run bundle
```
1. :white_check_mark: Check your artifact
There will be a tgz file in your extension directory now
## Update the Extension Metadata
The [`package.json`](package.json) file defines metadata about your extension, such as
extension name, main entry, description and version.
When you copy this repository, update `package.json` with the name, description for your extension.
## Update the Extension Code
The [`src/`](./src/) directory is the heart of your extension! This contains the
source code that will be run when your extension functions are invoked. You can replace the
contents of this directory with your own code.
There are a few things to keep in mind when writing your extension code:
- Most Jan Extension functions are processed asynchronously.
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
```typescript
import { events, MessageEvent, MessageRequest } from '@janhq/core'
function onStart(): Promise<any> {
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
this.inference(data)
)
}
```
For more information about the Jan Extension Core module, see the
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
So, what are you waiting for? Go ahead and start customizing your extension!

View File

@ -0,0 +1,43 @@
{
"name": "@janhq/inference-openrouter-extension",
"productName": "OpenRouter Inference Engine",
"version": "1.0.0",
"description": "This extension enables Open Router chat completion API calls",
"main": "dist/index.js",
"module": "dist/module.js",
"engine": "openrouter",
"author": "Jan <service@jan.ai>",
"license": "AGPL-3.0",
"scripts": {
"build": "tsc -b . && webpack --config webpack.config.js",
"build:publish": "rimraf *.tgz --glob && yarn build && npm pack && cpx *.tgz ../../pre-install",
"sync:core": "cd ../.. && yarn build:core && cd extensions && rm yarn.lock && cd inference-openrouter-extension && yarn && yarn build:publish"
},
"exports": {
".": "./dist/index.js",
"./main": "./dist/module.js"
},
"devDependencies": {
"cpx": "^1.5.0",
"rimraf": "^3.0.2",
"webpack": "^5.88.2",
"webpack-cli": "^5.1.4",
"ts-loader": "^9.5.0"
},
"dependencies": {
"@janhq/core": "file:../../core",
"fetch-retry": "^5.0.6",
"ulidx": "^2.3.0"
},
"engines": {
"node": ">=18.0.0"
},
"files": [
"dist/*",
"package.json",
"README.md"
],
"bundleDependencies": [
"fetch-retry"
]
}

View File

@ -0,0 +1,28 @@
[
{
"sources": [
{
"url": "https://openrouter.ai"
}
],
"id": "open-router-auto",
"object": "model",
"name": "OpenRouter",
"version": "1.0",
"description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"format": "api",
"settings": {},
"parameters": {
"max_tokens": 1024,
"temperature": 0.7,
"top_p": 0.95,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "OpenRouter",
"tags": ["General", "Big Context Length"]
},
"engine": "openrouter"
}
]

View File

@ -0,0 +1,23 @@
[
{
"key": "chat-completions-endpoint",
"title": "Chat Completions Endpoint",
"description": "The endpoint to use for chat completions. See the [OpenRouter API documentation](https://openrouter.ai/docs) for more information.",
"controllerType": "input",
"controllerProps": {
"placeholder": "https://openrouter.ai/api/v1/chat/completions",
"value": "https://openrouter.ai/api/v1/chat/completions"
}
},
{
"key": "openrouter-api-key",
"title": "API Key",
"description": "The OpenRouter API uses API keys for authentication. Visit your [API Keys](https://openrouter.ai/keys) page to retrieve the API key you'll use in your requests.",
"controllerType": "input",
"controllerProps": {
"placeholder": "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"value": "",
"type": "password"
}
}
]

View File

@ -0,0 +1,76 @@
/**
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
* The class provides methods for initializing and stopping a model, and for making inference requests.
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
* @version 1.0.0
* @module inference-openai-extension/src/index
*/
import { RemoteOAIEngine } from '@janhq/core'
import { PayloadType } from '@janhq/core'
import { ChatCompletionRole } from '@janhq/core'
declare const SETTINGS: Array<any>
declare const MODELS: Array<any>
enum Settings {
apiKey = 'openrouter-api-key',
chatCompletionsEndPoint = 'chat-completions-endpoint',
}
enum RoleType {
user = 'USER',
chatbot = 'CHATBOT',
system = 'SYSTEM',
}
/**
* A class that implements the InferenceExtension interface from the @janhq/core package.
* The class provides methods for initializing and stopping a model, and for making inference requests.
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
*/
export default class JanInferenceOpenRouterExtension extends RemoteOAIEngine {
inferenceUrl: string = ''
provider: string = 'openrouter'
override async onLoad(): Promise<void> {
super.onLoad()
// Register Settings
this.registerSettings(SETTINGS)
this.registerModels(MODELS)
this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
this.inferenceUrl = await this.getSetting<string>(
Settings.chatCompletionsEndPoint,
''
)
if (this.inferenceUrl.length === 0) {
SETTINGS.forEach((setting) => {
if (setting.key === Settings.chatCompletionsEndPoint) {
this.inferenceUrl = setting.controllerProps.value as string
}
})
}
}
onSettingUpdate<T>(key: string, value: T): void {
if (key === Settings.apiKey) {
this.apiKey = value as string
} else if (key === Settings.chatCompletionsEndPoint) {
if (typeof value !== 'string') return
if (value.trim().length === 0) {
SETTINGS.forEach((setting) => {
if (setting.key === Settings.chatCompletionsEndPoint) {
this.inferenceUrl = setting.controllerProps.value as string
}
})
} else {
this.inferenceUrl = value
}
}
}
transformPayload = (payload: PayloadType)=>({...payload,model:"openrouter/auto"})
}

View File

@ -0,0 +1,14 @@
{
"compilerOptions": {
"target": "es2016",
"module": "ES6",
"moduleResolution": "node",
"outDir": "./dist",
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"strict": false,
"skipLibCheck": true,
"rootDir": "./src"
},
"include": ["./src"]
}

View File

@ -0,0 +1,37 @@
const webpack = require('webpack')
const packageJson = require('./package.json')
const settingJson = require('./resources/settings.json')
const modelsJson = require('./resources/models.json')
module.exports = {
experiments: { outputModule: true },
entry: './src/index.ts', // Adjust the entry point to match your project's main file
mode: 'production',
module: {
rules: [
{
test: /\.tsx?$/,
use: 'ts-loader',
exclude: /node_modules/,
},
],
},
plugins: [
new webpack.DefinePlugin({
MODELS: JSON.stringify(modelsJson),
SETTINGS: JSON.stringify(settingJson),
ENGINE: JSON.stringify(packageJson.engine),
}),
],
output: {
filename: 'index.js', // Adjust the output file name as needed
library: { type: 'module' }, // Specify ESM output format
},
resolve: {
extensions: ['.ts', '.js'],
},
optimization: {
minimize: false,
},
// Add loaders and other configuration as needed for your project
}

View File

@ -97,7 +97,7 @@ function unloadModel(): Promise<void> {
} }
if (subprocess?.pid) { if (subprocess?.pid) {
log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`) log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
const pid = subprocess.pid const pid = subprocess.pid
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
terminate(pid, function (err) { terminate(pid, function (err) {
@ -107,7 +107,7 @@ function unloadModel(): Promise<void> {
return tcpPortUsed return tcpPortUsed
.waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000) .waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000)
.then(() => resolve()) .then(() => resolve())
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`)) .then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
.catch(() => { .catch(() => {
killRequest() killRequest()
}) })

View File

@ -45,6 +45,7 @@
--border: 20 5.9% 90%; --border: 20 5.9% 90%;
--input: 20 5.9% 90%; --input: 20 5.9% 90%;
--ring: 20 14.3% 4.1%; --ring: 20 14.3% 4.1%;
--scroll-bar: 60, 3%, 86%;
.primary-blue { .primary-blue {
--primary: 221 83% 53%; --primary: 221 83% 53%;

View File

@ -21,3 +21,38 @@
@apply bg-border relative z-50 w-[10px] rounded-full; @apply bg-border relative z-50 w-[10px] rounded-full;
} }
} }
// Customized scroll bar
::-webkit-scrollbar {
width: 7px;
}
::-webkit-scrollbar-thumb {
background-color: hsl(var(--scroll-bar));
border-radius: 4px;
}
::-webkit-scrollbar-track {
background-color: hsl(var(--background));
}
::-webkit-scrollbar-corner {
background-color: hsl(var(--background));
}
::-moz-scrollbar {
width: 7px;
}
::-moz-scrollbar-thumb {
background-color: hsl(var(--scroll-bar));
border-radius: 4px;
}
::-moz-scrollbar-track {
background-color: hsl(var(--background));
}
::-moz-scrollbar-corner {
background-color: hsl(var(--background));
}

View File

@ -25,6 +25,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal'
import ImportingModelModal from '@/screens/Settings/ImportingModelModal' import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
import SelectingModelModal from '@/screens/Settings/SelectingModelModal' import SelectingModelModal from '@/screens/Settings/SelectingModelModal'
import LoadingModal from '../LoadingModal'
import MainViewContainer from '../MainViewContainer' import MainViewContainer from '../MainViewContainer'
import InstallingExtensionModal from './BottomBar/InstallingExtension/InstallingExtensionModal' import InstallingExtensionModal from './BottomBar/InstallingExtension/InstallingExtensionModal'
@ -69,6 +71,7 @@ const BaseLayout = () => {
<BottomBar /> <BottomBar />
</div> </div>
</div> </div>
<LoadingModal />
{importModelStage === 'SELECTING_MODEL' && <SelectingModelModal />} {importModelStage === 'SELECTING_MODEL' && <SelectingModelModal />}
{importModelStage === 'MODEL_SELECTED' && <ImportModelOptionModal />} {importModelStage === 'MODEL_SELECTED' && <ImportModelOptionModal />}
{importModelStage === 'IMPORTING_MODEL' && <ImportingModelModal />} {importModelStage === 'IMPORTING_MODEL' && <ImportingModelModal />}

View File

@ -1,4 +1,4 @@
import { ReactNode, useEffect, useRef } from 'react' import { ReactNode, useCallback, useEffect, useRef } from 'react'
type Props = { type Props = {
children: ReactNode children: ReactNode
@ -6,20 +6,44 @@ type Props = {
const ListContainer: React.FC<Props> = ({ children }) => { const ListContainer: React.FC<Props> = ({ children }) => {
const listRef = useRef<HTMLDivElement>(null) const listRef = useRef<HTMLDivElement>(null)
const prevScrollTop = useRef(0)
const isUserManuallyScrollingUp = useRef(false)
const handleScroll = useCallback((event: React.UIEvent<HTMLElement>) => {
const currentScrollTop = event.currentTarget.scrollTop
if (prevScrollTop.current > currentScrollTop) {
console.debug('User is manually scrolling up')
isUserManuallyScrollingUp.current = true
} else {
const currentScrollTop = event.currentTarget.scrollTop
const scrollHeight = event.currentTarget.scrollHeight
const clientHeight = event.currentTarget.clientHeight
if (currentScrollTop + clientHeight >= scrollHeight) {
console.debug('Scrolled to the bottom')
isUserManuallyScrollingUp.current = false
}
}
prevScrollTop.current = currentScrollTop
}, [])
useEffect(() => { useEffect(() => {
const scrollHeight = listRef.current?.scrollHeight ?? 0 if (isUserManuallyScrollingUp.current === true) return
const scrollHeight = listRef.current?.scrollHeight ?? 0
listRef.current?.scrollTo({ listRef.current?.scrollTo({
top: scrollHeight, top: scrollHeight,
behavior: 'smooth', behavior: 'instant',
})
}) })
}, [listRef.current?.scrollHeight, isUserManuallyScrollingUp])
return ( return (
<div <div
ref={listRef} ref={listRef}
className="flex h-full w-full flex-col overflow-y-scroll" className="flex h-full w-full flex-col overflow-y-scroll"
onScroll={handleScroll}
> >
{children} {children}
</div> </div>

View File

@ -0,0 +1,26 @@
import { Modal, ModalContent, ModalHeader, ModalTitle } from '@janhq/uikit'
import { atom, useAtomValue } from 'jotai'
export type LoadingInfo = {
title: string
message: string
}
export const loadingModalInfoAtom = atom<LoadingInfo | undefined>(undefined)
const ResettingModal: React.FC = () => {
const loadingInfo = useAtomValue(loadingModalInfoAtom)
return (
<Modal open={loadingInfo != null}>
<ModalContent>
<ModalHeader>
<ModalTitle>{loadingInfo?.title}</ModalTitle>
</ModalHeader>
<p className="text-muted-foreground">{loadingInfo?.message}</p>
</ModalContent>
</Modal>
)
}
export default ResettingModal

View File

@ -0,0 +1,101 @@
import { Fragment, ReactNode } from 'react'
import { useSetAtom } from 'jotai'
import { useDebouncedCallback } from 'use-debounce'
import { useGetHFRepoData } from '@/hooks/useGetHFRepoData'
import { loadingModalInfoAtom } from '../LoadingModal'
import { toaster } from '../Toast'
import {
importHuggingFaceModelStageAtom,
importingHuggingFaceRepoDataAtom,
} from '@/helpers/atoms/HuggingFace.atom'
type Props = {
children: ReactNode
}
const DeepLinkListener: React.FC<Props> = ({ children }) => {
const { getHfRepoData } = useGetHFRepoData()
const setLoadingInfo = useSetAtom(loadingModalInfoAtom)
const setImportingHuggingFaceRepoData = useSetAtom(
importingHuggingFaceRepoDataAtom
)
const setImportHuggingFaceModelStage = useSetAtom(
importHuggingFaceModelStageAtom
)
const handleDeepLinkAction = useDebouncedCallback(
async (deepLinkAction: DeepLinkAction) => {
if (
deepLinkAction.action !== 'models' ||
deepLinkAction.provider !== 'huggingface'
) {
console.error(
`Invalid deeplink action (${deepLinkAction.action}) or provider (${deepLinkAction.provider})`
)
return
}
try {
setLoadingInfo({
title: 'Getting Hugging Face models',
message: 'Please wait..',
})
const data = await getHfRepoData(deepLinkAction.resource)
setImportingHuggingFaceRepoData(data)
setImportHuggingFaceModelStage('REPO_DETAIL')
setLoadingInfo(undefined)
} catch (err) {
setLoadingInfo(undefined)
toaster({
title: 'Failed to get Hugging Face models',
description: err instanceof Error ? err.message : 'Unexpected Error',
type: 'error',
})
console.error(err)
}
},
300
)
window.electronAPI?.onDeepLink((_event: string, input: string) => {
window.core?.api?.ackDeepLink()
const action = deeplinkParser(input)
if (!action) return
handleDeepLinkAction(action)
})
return <Fragment>{children}</Fragment>
}
type DeepLinkAction = {
action: string
provider: string
resource: string
}
const deeplinkParser = (
deepLink: string | undefined
): DeepLinkAction | undefined => {
if (!deepLink) return undefined
try {
const url = new URL(deepLink)
const params = url.pathname.split('/').filter((str) => str.length > 0)
if (params.length < 3) return undefined
const action = params[0]
const provider = params[1]
const resource = params.slice(2).join('/')
return { action, provider, resource }
} catch (err) {
console.error(err)
return undefined
}
}
export default DeepLinkListener

View File

@ -22,6 +22,7 @@ import Loader from '../Loader'
import DataLoader from './DataLoader' import DataLoader from './DataLoader'
import DeepLinkListener from './DeepLinkListener'
import KeyListener from './KeyListener' import KeyListener from './KeyListener'
import { extensionManager } from '@/extension' import { extensionManager } from '@/extension'
@ -78,7 +79,9 @@ const Providers = ({ children }: PropsWithChildren) => {
<KeyListener> <KeyListener>
<EventListenerWrapper> <EventListenerWrapper>
<TooltipProvider delayDuration={0}> <TooltipProvider delayDuration={0}>
<DataLoader>{children}</DataLoader> <DataLoader>
<DeepLinkListener>{children}</DeepLinkListener>
</DataLoader>
</TooltipProvider> </TooltipProvider>
</EventListenerWrapper> </EventListenerWrapper>
<Toaster /> <Toaster />

View File

@ -99,6 +99,11 @@ export const useCreateNewThread = () => {
? { ctx_len: 2048 } ? { ctx_len: 2048 }
: {} : {}
const overriddenParameters =
defaultModel?.parameters.max_tokens && defaultModel.parameters.max_tokens
? { max_tokens: 2048 }
: {}
const createdAt = Date.now() const createdAt = Date.now()
const assistantInfo: ThreadAssistantInfo = { const assistantInfo: ThreadAssistantInfo = {
assistant_id: assistant.id, assistant_id: assistant.id,
@ -107,7 +112,8 @@ export const useCreateNewThread = () => {
model: { model: {
id: defaultModel?.id ?? '*', id: defaultModel?.id ?? '*',
settings: { ...defaultModel?.settings, ...overriddenSettings } ?? {}, settings: { ...defaultModel?.settings, ...overriddenSettings } ?? {},
parameters: defaultModel?.parameters ?? {}, parameters:
{ ...defaultModel?.parameters, ...overriddenParameters } ?? {},
engine: defaultModel?.engine, engine: defaultModel?.engine,
}, },
instructions: assistant.instructions, instructions: assistant.instructions,

View File

@ -22,8 +22,8 @@ const ChatBody: React.FC = () => {
const downloadedModels = useAtomValue(downloadedModelsAtom) const downloadedModels = useAtomValue(downloadedModelsAtom)
const loadModelError = useAtomValue(loadModelErrorAtom) const loadModelError = useAtomValue(loadModelErrorAtom)
if (downloadedModels.length === 0) return <EmptyModel /> if (!downloadedModels.length) return <EmptyModel />
if (messages.length === 0) return <EmptyThread /> if (!messages.length) return <EmptyThread />
return ( return (
<ListContainer> <ListContainer>

View File

@ -129,12 +129,10 @@ const EditChatInput: React.FC<Props> = ({ message }) => {
} }
return ( return (
<div className="mx-auto flex w-full flex-shrink-0 items-end justify-center space-x-4 pb-0 pt-1"> <div className="mx-auto flex w-full flex-shrink-0 flex-col items-start justify-center space-y-4 pb-0 pt-1">
<div className="relative flex w-full flex-col"> <div className="relative flex w-full flex-col">
<Textarea <Textarea
className={twMerge( className={twMerge('max-h-[400px] resize-none pr-20')}
'max-h-[400px] resize-none overflow-y-hidden pr-20'
)}
style={{ height: '40px' }} style={{ height: '40px' }}
ref={textareaRef} ref={textareaRef}
onKeyDown={onKeyDown} onKeyDown={onKeyDown}

View File

@ -3,12 +3,17 @@ import {
InputComponentProps, InputComponentProps,
CheckboxComponentProps, CheckboxComponentProps,
SliderComponentProps, SliderComponentProps,
InferenceEngine,
} from '@janhq/core' } from '@janhq/core'
import { useAtomValue } from 'jotai/react'
import Checkbox from '@/containers/Checkbox' import Checkbox from '@/containers/Checkbox'
import ModelConfigInput from '@/containers/ModelConfigInput' import ModelConfigInput from '@/containers/ModelConfigInput'
import SliderRightPanel from '@/containers/SliderRightPanel' import SliderRightPanel from '@/containers/SliderRightPanel'
import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
type Props = { type Props = {
componentProps: SettingComponentProps[] componentProps: SettingComponentProps[]
disabled?: boolean disabled?: boolean
@ -20,6 +25,7 @@ const SettingComponent: React.FC<Props> = ({
disabled = false, disabled = false,
onValueUpdated, onValueUpdated,
}) => { }) => {
const activeThread = useAtomValue(activeThreadAtom)
const components = componentProps.map((data) => { const components = componentProps.map((data) => {
switch (data.controllerType) { switch (data.controllerType) {
case 'slider': { case 'slider': {
@ -31,7 +37,16 @@ const SettingComponent: React.FC<Props> = ({
title={data.title} title={data.title}
description={data.description} description={data.description}
min={min} min={min}
max={max} max={
data.key === 'max_tokens' &&
activeThread &&
activeThread.assistants[0].model.engine === InferenceEngine.nitro
? Number(
activeThread &&
activeThread.assistants[0].model.settings.ctx_len
)
: max
}
step={step} step={step}
value={value} value={value}
name={data.key} name={data.key}

View File

@ -33,7 +33,7 @@ export const presetConfiguration: Record<string, SettingComponentProps> = {
'The context length for model operations varies; the maximum depends on the specific model used.', 'The context length for model operations varies; the maximum depends on the specific model used.',
controllerType: 'slider', controllerType: 'slider',
controllerProps: { controllerProps: {
min: 0, min: 128,
max: 4096, max: 4096,
step: 128, step: 128,
value: 2048, value: 2048,

View File

@ -118,6 +118,32 @@ const Sidebar: React.FC = () => {
updateModelParameter(activeThread, { updateModelParameter(activeThread, {
params: { [key]: value }, params: { [key]: value },
}) })
if (
activeThread.assistants[0].model.parameters.max_tokens &&
activeThread.assistants[0].model.settings.ctx_len
) {
if (
key === 'max_tokens' &&
Number(value) > activeThread.assistants[0].model.settings.ctx_len
) {
updateModelParameter(activeThread, {
params: {
max_tokens: activeThread.assistants[0].model.settings.ctx_len,
},
})
}
if (
key === 'ctx_len' &&
Number(value) < activeThread.assistants[0].model.parameters.max_tokens
) {
updateModelParameter(activeThread, {
params: {
max_tokens: activeThread.assistants[0].model.settings.ctx_len,
},
})
}
}
}, },
[activeThread, setEngineParamsUpdate, stopModel, updateModelParameter] [activeThread, setEngineParamsUpdate, stopModel, updateModelParameter]
) )