diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000..f980b9df7 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,4 @@ +{ + "name": "jan", + "image": "node:20" +} \ No newline at end of file diff --git a/.github/workflows/jan-electron-build-nightly.yml b/.github/workflows/jan-electron-build-nightly.yml index 4531152d4..cad2ac227 100644 --- a/.github/workflows/jan-electron-build-nightly.yml +++ b/.github/workflows/jan-electron-build-nightly.yml @@ -8,7 +8,7 @@ on: - 'README.md' - 'docs/**' schedule: - - cron: '0 20 * * 2,3,4' # At 8 PM UTC on Tuesday, Wednesday, and Thursday, which is 3 AM UTC+7 + - cron: '0 20 * * 1,2,3' # At 8 PM UTC on Monday, Tuesday, and Wednesday which is 3 AM UTC+7 Tuesday, Wednesday, and Thursday workflow_dispatch: inputs: public_provider: diff --git a/.github/workflows/template-build-linux-x64.yml b/.github/workflows/template-build-linux-x64.yml index c6d1eac97..08cb1dada 100644 --- a/.github/workflows/template-build-linux-x64.yml +++ b/.github/workflows/template-build-linux-x64.yml @@ -98,8 +98,8 @@ jobs: make build-and-publish env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - ANALYTICS_ID: ${{ secrets.JAN_APP_POSTHOG_PROJECT_API_KEY }} - ANALYTICS_HOST: ${{ secrets.JAN_APP_POSTHOG_URL }} + ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }} + ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }} - name: Upload Artifact .deb file if: inputs.public_provider != 'github' diff --git a/.github/workflows/template-build-macos.yml b/.github/workflows/template-build-macos.yml index bc48e6c21..0ad1d3a6a 100644 --- a/.github/workflows/template-build-macos.yml +++ b/.github/workflows/template-build-macos.yml @@ -137,8 +137,8 @@ jobs: APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }} APP_PATH: "." DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }} - ANALYTICS_ID: ${{ secrets.JAN_APP_POSTHOG_PROJECT_API_KEY }} - ANALYTICS_HOST: ${{ secrets.JAN_APP_POSTHOG_URL }} + ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }} + ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }} - name: Upload Artifact if: inputs.public_provider != 'github' diff --git a/.github/workflows/template-build-windows-x64.yml b/.github/workflows/template-build-windows-x64.yml index 5d96b3f49..b81997bde 100644 --- a/.github/workflows/template-build-windows-x64.yml +++ b/.github/workflows/template-build-windows-x64.yml @@ -127,8 +127,8 @@ jobs: make build-and-publish env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - ANALYTICS_ID: ${{ secrets.JAN_APP_POSTHOG_PROJECT_API_KEY }} - ANALYTICS_HOST: ${{ secrets.JAN_APP_POSTHOG_URL }} + ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }} + ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }} AZURE_KEY_VAULT_URI: ${{ secrets.AZURE_KEY_VAULT_URI }} AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} diff --git a/.github/workflows/update-release-url.yml b/.github/workflows/update-release-url.yml index 545d6542e..99a3db0e0 100644 --- a/.github/workflows/update-release-url.yml +++ b/.github/workflows/update-release-url.yml @@ -17,7 +17,7 @@ jobs: with: fetch-depth: "0" token: ${{ secrets.PAT_SERVICE_ACCOUNT }} - ref: main + ref: dev - name: Get Latest Release uses: pozetroninc/github-action-get-latest-release@v0.7.0 @@ -46,4 +46,4 @@ jobs: git config --global user.name "Service Account" git add README.md git commit -m "Update README.md with Stable Download URLs" - git -c http.extraheader="AUTHORIZATION: bearer ${{ secrets.PAT_SERVICE_ACCOUNT }}" push origin HEAD:main + git -c http.extraheader="AUTHORIZATION: bearer ${{ secrets.PAT_SERVICE_ACCOUNT }}" push origin HEAD:dev diff --git a/.gitignore b/.gitignore index e3e4635fc..4540e5c7a 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,8 @@ build electron/renderer electron/models electron/docs +electron/engines +server/pre-install package-lock.json *.log @@ -26,3 +28,4 @@ extensions/inference-nitro-extension/bin/*/*.exp extensions/inference-nitro-extension/bin/*/*.lib extensions/inference-nitro-extension/bin/saved-* extensions/inference-nitro-extension/bin/*.tar.gz + diff --git a/README.md b/README.md index 3a99407f5..34eecc9f3 100644 --- a/README.md +++ b/README.md @@ -76,31 +76,31 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute Experimental (Nightly Build) - + jan.exe - + Intel - + M1/M2 - + jan.deb - + jan.AppImage diff --git a/core/.prettierignore b/core/.prettierignore new file mode 100644 index 000000000..02d9145c1 --- /dev/null +++ b/core/.prettierignore @@ -0,0 +1,5 @@ +.next/ +node_modules/ +dist/ +*.hbs +*.mdx \ No newline at end of file diff --git a/core/src/api/index.ts b/core/src/api/index.ts index a3d0361e7..0d7cc51f7 100644 --- a/core/src/api/index.ts +++ b/core/src/api/index.ts @@ -3,7 +3,6 @@ * @description Enum of all the routes exposed by the app */ export enum AppRoute { - appDataPath = 'appDataPath', openExternalUrl = 'openExternalUrl', openAppDirectory = 'openAppDirectory', openFileExplore = 'openFileExplorer', @@ -12,6 +11,7 @@ export enum AppRoute { updateAppConfiguration = 'updateAppConfiguration', relaunch = 'relaunch', joinPath = 'joinPath', + isSubdirectory = 'isSubdirectory', baseName = 'baseName', startServer = 'startServer', stopServer = 'stopServer', @@ -61,7 +61,9 @@ export enum FileManagerRoute { syncFile = 'syncFile', getJanDataFolderPath = 'getJanDataFolderPath', getResourcePath = 'getResourcePath', + getUserHomePath = 'getUserHomePath', fileStat = 'fileStat', + writeBlob = 'writeBlob', } export type ApiFunction = (...args: any[]) => any diff --git a/core/src/core.ts b/core/src/core.ts index aa545e10e..8831c6001 100644 --- a/core/src/core.ts +++ b/core/src/core.ts @@ -22,7 +22,11 @@ const executeOnMain: (extension: string, method: string, ...args: any[]) => Prom * @param {object} network - Optional object to specify proxy/whether to ignore SSL certificates. * @returns {Promise} A promise that resolves when the file is downloaded. */ -const downloadFile: (url: string, fileName: string, network?: { proxy?: string, ignoreSSL?: boolean }) => Promise = (url, fileName, network) => { +const downloadFile: ( + url: string, + fileName: string, + network?: { proxy?: string; ignoreSSL?: boolean } +) => Promise = (url, fileName, network) => { return global.core?.api?.downloadFile(url, fileName, network) } @@ -79,6 +83,12 @@ const openExternalUrl: (url: string) => Promise = (url) => */ const getResourcePath: () => Promise = () => global.core.api?.getResourcePath() +/** + * Gets the user's home path. + * @returns return user's home path + */ +const getUserHomePath = (): Promise => global.core.api?.getUserHomePath() + /** * Log to file from browser processes. * @@ -87,6 +97,17 @@ const getResourcePath: () => Promise = () => global.core.api?.getResourc const log: (message: string, fileName?: string) => void = (message, fileName) => global.core.api?.log(message, fileName) +/** + * Check whether the path is a subdirectory of another path. + * + * @param from - The path to check. + * @param to - The path to check against. + * + * @returns {Promise} - A promise that resolves with a boolean indicating whether the path is a subdirectory. + */ +const isSubdirectory: (from: string, to: string) => Promise = (from: string, to: string) => + global.core.api?.isSubdirectory(from, to) + /** * Register extension point function type definition */ @@ -94,7 +115,7 @@ export type RegisterExtensionPoint = ( extensionName: string, extensionId: string, method: Function, - priority?: number, + priority?: number ) => void /** @@ -111,5 +132,7 @@ export { openExternalUrl, baseName, log, + isSubdirectory, + getUserHomePath, FileStat, } diff --git a/core/src/fs.ts b/core/src/fs.ts index ea636977a..0e570d1f5 100644 --- a/core/src/fs.ts +++ b/core/src/fs.ts @@ -1,4 +1,4 @@ -import { FileStat } from "./types" +import { FileStat } from './types' /** * Writes data to a file at the specified path. @@ -6,6 +6,15 @@ import { FileStat } from "./types" */ const writeFileSync = (...args: any[]) => global.core.api?.writeFileSync(...args) +/** + * Writes blob data to a file at the specified path. + * @param path - The path to file. + * @param data - The blob data. + * @returns + */ +const writeBlob: (path: string, data: string) => Promise = (path, data) => + global.core.api?.writeBlob(path, data) + /** * Reads the contents of a file at the specified path. * @returns {Promise} A Promise that resolves with the contents of the file. @@ -60,7 +69,6 @@ const syncFile: (src: string, dest: string) => Promise = (src, dest) => */ const copyFileSync = (...args: any[]) => global.core.api?.copyFileSync(...args) - /** * Gets the file's stats. * @@ -70,7 +78,6 @@ const copyFileSync = (...args: any[]) => global.core.api?.copyFileSync(...args) const fileStat: (path: string) => Promise = (path) => global.core.api?.fileStat(path) - // TODO: Export `dummy` fs functions automatically // Currently adding these manually export const fs = { @@ -84,5 +91,6 @@ export const fs = { appendFileSync, copyFileSync, syncFile, - fileStat + fileStat, + writeBlob, } diff --git a/core/src/node/api/common/builder.ts b/core/src/node/api/common/builder.ts index 14946f415..5c99cf4d8 100644 --- a/core/src/node/api/common/builder.ts +++ b/core/src/node/api/common/builder.ts @@ -2,7 +2,8 @@ import fs from 'fs' import { JanApiRouteConfiguration, RouteConfiguration } from './configuration' import { join } from 'path' import { ContentType, MessageStatus, Model, ThreadMessage } from './../../../index' -import { getJanDataFolderPath } from '../../utils' +import { getEngineConfiguration, getJanDataFolderPath } from '../../utils' +import { DEFAULT_CHAT_COMPLETION_URL } from './consts' export const getBuilder = async (configuration: RouteConfiguration) => { const directoryPath = join(getJanDataFolderPath(), configuration.dirName) @@ -265,19 +266,22 @@ export const downloadModel = async ( const modelBinaryPath = join(directoryPath, modelId) const request = require('request') - const rq = request({ url: model.source_url, strictSSL, proxy }) const progress = require('request-progress') - progress(rq, {}) - .on('progress', function (state: any) { - console.log('progress', JSON.stringify(state, null, 2)) - }) - .on('error', function (err: Error) { - console.error('error', err) - }) - .on('end', function () { - console.log('end') - }) - .pipe(fs.createWriteStream(modelBinaryPath)) + + for (const source of model.sources) { + const rq = request({ url: source, strictSSL, proxy }) + progress(rq, {}) + .on('progress', function (state: any) { + console.debug('progress', JSON.stringify(state, null, 2)) + }) + .on('error', function (err: Error) { + console.error('error', err) + }) + .on('end', function () { + console.debug('end') + }) + .pipe(fs.createWriteStream(modelBinaryPath)) + } return { message: `Starting download ${modelId}`, @@ -306,7 +310,7 @@ export const chatCompletions = async (request: any, reply: any) => { const engineConfiguration = await getEngineConfiguration(requestedModel.engine) let apiKey: string | undefined = undefined - let apiUrl: string = 'http://127.0.0.1:3928/inferences/llamacpp/chat_completion' // default nitro url + let apiUrl: string = DEFAULT_CHAT_COMPLETION_URL if (engineConfiguration) { apiKey = engineConfiguration.api_key @@ -317,7 +321,7 @@ export const chatCompletions = async (request: any, reply: any) => { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', - "Access-Control-Allow-Origin": "*" + 'Access-Control-Allow-Origin': '*', }) const headers: Record = { @@ -343,13 +347,3 @@ export const chatCompletions = async (request: any, reply: any) => { response.body.pipe(reply.raw) } } - -const getEngineConfiguration = async (engineId: string) => { - if (engineId !== 'openai') { - return undefined - } - const directoryPath = join(getJanDataFolderPath(), 'engines') - const filePath = join(directoryPath, `${engineId}.json`) - const data = await fs.readFileSync(filePath, 'utf-8') - return JSON.parse(data) -} diff --git a/core/src/node/api/common/consts.ts b/core/src/node/api/common/consts.ts new file mode 100644 index 000000000..bc3cfe300 --- /dev/null +++ b/core/src/node/api/common/consts.ts @@ -0,0 +1,19 @@ +// The PORT to use for the Nitro subprocess +export const NITRO_DEFAULT_PORT = 3928 + +// The HOST address to use for the Nitro subprocess +export const LOCAL_HOST = '127.0.0.1' + +export const SUPPORTED_MODEL_FORMAT = '.gguf' + +// The URL for the Nitro subprocess +const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}` +// The URL for the Nitro subprocess to load a model +export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel` +// The URL for the Nitro subprocess to validate a model +export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus` + +// The URL for the Nitro subprocess to kill itself +export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy` + +export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url diff --git a/core/src/node/api/common/startStopModel.ts b/core/src/node/api/common/startStopModel.ts new file mode 100644 index 000000000..0d4934e1c --- /dev/null +++ b/core/src/node/api/common/startStopModel.ts @@ -0,0 +1,351 @@ +import fs from 'fs' +import { join } from 'path' +import { getJanDataFolderPath, getJanExtensionsPath, getSystemResourceInfo } from '../../utils' +import { logServer } from '../../log' +import { ChildProcessWithoutNullStreams, spawn } from 'child_process' +import { Model, ModelSettingParams, PromptTemplate } from '../../../types' +import { + LOCAL_HOST, + NITRO_DEFAULT_PORT, + NITRO_HTTP_KILL_URL, + NITRO_HTTP_LOAD_MODEL_URL, + NITRO_HTTP_VALIDATE_MODEL_URL, + SUPPORTED_MODEL_FORMAT, +} from './consts' + +// The subprocess instance for Nitro +let subprocess: ChildProcessWithoutNullStreams | undefined = undefined + +// TODO: move this to core type +interface NitroModelSettings extends ModelSettingParams { + llama_model_path: string + cpu_threads: number +} + +export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => { + try { + await runModel(modelId, settingParams) + + return { + message: `Model ${modelId} started`, + } + } catch (e) { + return { + error: e, + } + } +} + +const runModel = async (modelId: string, settingParams?: ModelSettingParams): Promise => { + const janDataFolderPath = getJanDataFolderPath() + const modelFolderFullPath = join(janDataFolderPath, 'models', modelId) + + if (!fs.existsSync(modelFolderFullPath)) { + throw `Model not found: ${modelId}` + } + + const files: string[] = fs.readdirSync(modelFolderFullPath) + + // Look for GGUF model file + const ggufBinFile = files.find((file) => file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)) + + const modelMetadataPath = join(modelFolderFullPath, 'model.json') + const modelMetadata: Model = JSON.parse(fs.readFileSync(modelMetadataPath, 'utf-8')) + + if (!ggufBinFile) { + throw 'No GGUF model file found' + } + const modelBinaryPath = join(modelFolderFullPath, ggufBinFile) + + const nitroResourceProbe = await getSystemResourceInfo() + const nitroModelSettings: NitroModelSettings = { + ...modelMetadata.settings, + ...settingParams, + llama_model_path: modelBinaryPath, + // This is critical and requires real CPU physical core count (or performance core) + cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), + ...(modelMetadata.settings.mmproj && { + mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj), + }), + } + + logServer(`[NITRO]::Debug: Nitro model settings: ${JSON.stringify(nitroModelSettings)}`) + + // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt + if (modelMetadata.settings.prompt_template) { + const promptTemplate = modelMetadata.settings.prompt_template + const prompt = promptTemplateConverter(promptTemplate) + if (prompt?.error) { + return Promise.reject(prompt.error) + } + nitroModelSettings.system_prompt = prompt.system_prompt + nitroModelSettings.user_prompt = prompt.user_prompt + nitroModelSettings.ai_prompt = prompt.ai_prompt + } + + await runNitroAndLoadModel(modelId, nitroModelSettings) +} + +// TODO: move to util +const promptTemplateConverter = (promptTemplate: string): PromptTemplate => { + // Split the string using the markers + const systemMarker = '{system_message}' + const promptMarker = '{prompt}' + + if (promptTemplate.includes(systemMarker) && promptTemplate.includes(promptMarker)) { + // Find the indices of the markers + const systemIndex = promptTemplate.indexOf(systemMarker) + const promptIndex = promptTemplate.indexOf(promptMarker) + + // Extract the parts of the string + const system_prompt = promptTemplate.substring(0, systemIndex) + const user_prompt = promptTemplate.substring(systemIndex + systemMarker.length, promptIndex) + const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length) + + // Return the split parts + return { system_prompt, user_prompt, ai_prompt } + } else if (promptTemplate.includes(promptMarker)) { + // Extract the parts of the string for the case where only promptMarker is present + const promptIndex = promptTemplate.indexOf(promptMarker) + const user_prompt = promptTemplate.substring(0, promptIndex) + const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length) + + // Return the split parts + return { user_prompt, ai_prompt } + } + + // Return an error if none of the conditions are met + return { error: 'Cannot split prompt template' } +} + +const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSettings) => { + // Gather system information for CPU physical cores and memory + const tcpPortUsed = require('tcp-port-used') + + await stopModel(modelId) + await tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000) + + /** + * There is a problem with Windows process manager + * Should wait for awhile to make sure the port is free and subprocess is killed + * The tested threshold is 500ms + **/ + if (process.platform === 'win32') { + await new Promise((resolve) => setTimeout(resolve, 500)) + } + + await spawnNitroProcess() + await loadLLMModel(modelSettings) + await validateModelStatus() +} + +const spawnNitroProcess = async (): Promise => { + logServer(`[NITRO]::Debug: Spawning Nitro subprocess...`) + + let binaryFolder = join( + getJanExtensionsPath(), + '@janhq', + 'inference-nitro-extension', + 'dist', + 'bin' + ) + + let executableOptions = executableNitroFile() + const tcpPortUsed = require('tcp-port-used') + + const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()] + // Execute the binary + logServer( + `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}` + ) + subprocess = spawn( + executableOptions.executablePath, + ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()], + { + cwd: binaryFolder, + env: { + ...process.env, + CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, + }, + } + ) + + // Handle subprocess output + subprocess.stdout.on('data', (data: any) => { + logServer(`[NITRO]::Debug: ${data}`) + }) + + subprocess.stderr.on('data', (data: any) => { + logServer(`[NITRO]::Error: ${data}`) + }) + + subprocess.on('close', (code: any) => { + logServer(`[NITRO]::Debug: Nitro exited with code: ${code}`) + subprocess = undefined + }) + + tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => { + logServer(`[NITRO]::Debug: Nitro is ready`) + }) +} + +type NitroExecutableOptions = { + executablePath: string + cudaVisibleDevices: string +} + +const executableNitroFile = (): NitroExecutableOptions => { + const nvidiaInfoFilePath = join(getJanDataFolderPath(), 'settings', 'settings.json') + let binaryFolder = join( + getJanExtensionsPath(), + '@janhq', + 'inference-nitro-extension', + 'dist', + 'bin' + ) + + let cudaVisibleDevices = '' + let binaryName = 'nitro' + /** + * The binary folder is different for each platform. + */ + if (process.platform === 'win32') { + /** + * For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0 + */ + let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8')) + if (nvidiaInfo['run_mode'] === 'cpu') { + binaryFolder = join(binaryFolder, 'win-cpu') + } else { + if (nvidiaInfo['cuda'].version === '12') { + binaryFolder = join(binaryFolder, 'win-cuda-12-0') + } else { + binaryFolder = join(binaryFolder, 'win-cuda-11-7') + } + cudaVisibleDevices = nvidiaInfo['gpu_highest_vram'] + } + binaryName = 'nitro.exe' + } else if (process.platform === 'darwin') { + /** + * For MacOS: mac-arm64 (Silicon), mac-x64 (InteL) + */ + if (process.arch === 'arm64') { + binaryFolder = join(binaryFolder, 'mac-arm64') + } else { + binaryFolder = join(binaryFolder, 'mac-x64') + } + } else { + /** + * For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0 + */ + let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8')) + if (nvidiaInfo['run_mode'] === 'cpu') { + binaryFolder = join(binaryFolder, 'linux-cpu') + } else { + if (nvidiaInfo['cuda'].version === '12') { + binaryFolder = join(binaryFolder, 'linux-cuda-12-0') + } else { + binaryFolder = join(binaryFolder, 'linux-cuda-11-7') + } + cudaVisibleDevices = nvidiaInfo['gpu_highest_vram'] + } + } + + return { + executablePath: join(binaryFolder, binaryName), + cudaVisibleDevices, + } +} + +const validateModelStatus = async (): Promise => { + // Send a GET request to the validation URL. + // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries. + const fetchRT = require('fetch-retry') + const fetchRetry = fetchRT(fetch) + + return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + }, + retries: 5, + retryDelay: 500, + }).then(async (res: Response) => { + logServer(`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(res)}`) + // If the response is OK, check model_loaded status. + if (res.ok) { + const body = await res.json() + // If the model is loaded, return an empty object. + // Otherwise, return an object with an error message. + if (body.model_loaded) { + return Promise.resolve() + } + } + return Promise.reject('Validate model status failed') + }) +} + +const loadLLMModel = async (settings: NitroModelSettings): Promise => { + logServer(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`) + const fetchRT = require('fetch-retry') + const fetchRetry = fetchRT(fetch) + + return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(settings), + retries: 3, + retryDelay: 500, + }) + .then((res: any) => { + logServer(`[NITRO]::Debug: Load model success with response ${JSON.stringify(res)}`) + return Promise.resolve(res) + }) + .catch((err: any) => { + logServer(`[NITRO]::Error: Load model failed with error ${err}`) + return Promise.reject(err) + }) +} + +/** + * Stop model and kill nitro process. + */ +export const stopModel = async (_modelId: string) => { + if (!subprocess) { + return { + error: "Model isn't running", + } + } + return new Promise((resolve, reject) => { + const controller = new AbortController() + setTimeout(() => { + controller.abort() + reject({ + error: 'Failed to stop model: Timedout', + }) + }, 5000) + const tcpPortUsed = require('tcp-port-used') + logServer(`[NITRO]::Debug: Request to kill Nitro`) + + fetch(NITRO_HTTP_KILL_URL, { + method: 'DELETE', + signal: controller.signal, + }) + .then(() => { + subprocess?.kill() + subprocess = undefined + }) + .catch(() => { + // don't need to do anything, we still kill the subprocess + }) + .then(() => tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000)) + .then(() => logServer(`[NITRO]::Debug: Nitro process is terminated`)) + .then(() => + resolve({ + message: 'Model stopped', + }) + ) + }) +} diff --git a/core/src/node/api/routes/common.ts b/core/src/node/api/routes/common.ts index a6c65a382..27385e561 100644 --- a/core/src/node/api/routes/common.ts +++ b/core/src/node/api/routes/common.ts @@ -10,6 +10,8 @@ import { } from '../common/builder' import { JanApiRouteConfiguration } from '../common/configuration' +import { startModel, stopModel } from '../common/startStopModel' +import { ModelSettingParams } from '../../../types' export const commonRouter = async (app: HttpServer) => { // Common Routes @@ -17,19 +19,33 @@ export const commonRouter = async (app: HttpServer) => { app.get(`/${key}`, async (_request) => getBuilder(JanApiRouteConfiguration[key])) app.get(`/${key}/:id`, async (request: any) => - retrieveBuilder(JanApiRouteConfiguration[key], request.params.id), + retrieveBuilder(JanApiRouteConfiguration[key], request.params.id) ) app.delete(`/${key}/:id`, async (request: any) => - deleteBuilder(JanApiRouteConfiguration[key], request.params.id), + deleteBuilder(JanApiRouteConfiguration[key], request.params.id) ) }) // Download Model Routes app.get(`/models/download/:modelId`, async (request: any) => - downloadModel(request.params.modelId, { ignoreSSL: request.query.ignoreSSL === 'true', proxy: request.query.proxy }), + downloadModel(request.params.modelId, { + ignoreSSL: request.query.ignoreSSL === 'true', + proxy: request.query.proxy, + }) ) + app.put(`/models/:modelId/start`, async (request: any) => { + let settingParams: ModelSettingParams | undefined = undefined + if (Object.keys(request.body).length !== 0) { + settingParams = JSON.parse(request.body) as ModelSettingParams + } + + return startModel(request.params.modelId, settingParams) + }) + + app.put(`/models/:modelId/stop`, async (request: any) => stopModel(request.params.modelId)) + // Chat Completion Routes app.post(`/chat/completions`, async (request: any, reply: any) => chatCompletions(request, reply)) diff --git a/core/src/node/api/routes/fileManager.ts b/core/src/node/api/routes/fileManager.ts index 159c23a0c..66056444e 100644 --- a/core/src/node/api/routes/fileManager.ts +++ b/core/src/node/api/routes/fileManager.ts @@ -8,5 +8,7 @@ export const fsRouter = async (app: HttpServer) => { app.post(`/app/${FileManagerRoute.getResourcePath}`, async (request: any, reply: any) => {}) + app.post(`/app/${FileManagerRoute.getUserHomePath}`, async (request: any, reply: any) => {}) + app.post(`/app/${FileManagerRoute.fileStat}`, async (request: any, reply: any) => {}) } diff --git a/core/src/node/api/routes/fs.ts b/core/src/node/api/routes/fs.ts index 5f511af27..c5404ccce 100644 --- a/core/src/node/api/routes/fs.ts +++ b/core/src/node/api/routes/fs.ts @@ -2,6 +2,7 @@ import { FileSystemRoute } from '../../../api' import { join } from 'path' import { HttpServer } from '../HttpServer' import { getJanDataFolderPath } from '../../utils' +import { normalizeFilePath } from '../../path' export const fsRouter = async (app: HttpServer) => { const moduleName = 'fs' @@ -13,10 +14,10 @@ export const fsRouter = async (app: HttpServer) => { const result = await import(moduleName).then((mdl) => { return mdl[route]( ...body.map((arg: any) => - typeof arg === 'string' && arg.includes('file:/') - ? join(getJanDataFolderPath(), arg.replace('file:/', '')) - : arg, - ), + typeof arg === 'string' && (arg.startsWith(`file:/`) || arg.startsWith(`file:\\`)) + ? join(getJanDataFolderPath(), normalizeFilePath(arg)) + : arg + ) ) }) res.status(200).send(result) diff --git a/core/src/node/utils/index.ts b/core/src/node/utils/index.ts index 00db04c9b..4bcbf13b1 100644 --- a/core/src/node/utils/index.ts +++ b/core/src/node/utils/index.ts @@ -1,16 +1,18 @@ -import { AppConfiguration } from "../../types"; -import { join } from "path"; -import fs from "fs"; -import os from "os"; +import { AppConfiguration, SystemResourceInfo } from '../../types' +import { join } from 'path' +import fs from 'fs' +import os from 'os' +import { log, logServer } from '../log' +import childProcess from 'child_process' // TODO: move this to core -const configurationFileName = "settings.json"; +const configurationFileName = 'settings.json' // TODO: do no specify app name in framework module -const defaultJanDataFolder = join(os.homedir(), "jan"); +const defaultJanDataFolder = join(os.homedir(), 'jan') const defaultAppConfig: AppConfiguration = { data_folder: defaultJanDataFolder, -}; +} /** * Getting App Configurations. @@ -20,39 +22,39 @@ const defaultAppConfig: AppConfiguration = { export const getAppConfigurations = (): AppConfiguration => { // Retrieve Application Support folder path // Fallback to user home directory if not found - const configurationFile = getConfigurationFilePath(); + const configurationFile = getConfigurationFilePath() if (!fs.existsSync(configurationFile)) { // create default app config if we don't have one - console.debug(`App config not found, creating default config at ${configurationFile}`); - fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig)); - return defaultAppConfig; + console.debug(`App config not found, creating default config at ${configurationFile}`) + fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig)) + return defaultAppConfig } try { const appConfigurations: AppConfiguration = JSON.parse( - fs.readFileSync(configurationFile, "utf-8"), - ); - return appConfigurations; + fs.readFileSync(configurationFile, 'utf-8') + ) + return appConfigurations } catch (err) { - console.error(`Failed to read app config, return default config instead! Err: ${err}`); - return defaultAppConfig; + console.error(`Failed to read app config, return default config instead! Err: ${err}`) + return defaultAppConfig } -}; +} const getConfigurationFilePath = () => join( - global.core?.appPath() || process.env[process.platform == "win32" ? "USERPROFILE" : "HOME"], - configurationFileName, - ); + global.core?.appPath() || process.env[process.platform == 'win32' ? 'USERPROFILE' : 'HOME'], + configurationFileName + ) export const updateAppConfiguration = (configuration: AppConfiguration): Promise => { - const configurationFile = getConfigurationFilePath(); - console.debug("updateAppConfiguration, configurationFile: ", configurationFile); + const configurationFile = getConfigurationFilePath() + console.debug('updateAppConfiguration, configurationFile: ', configurationFile) - fs.writeFileSync(configurationFile, JSON.stringify(configuration)); - return Promise.resolve(); -}; + fs.writeFileSync(configurationFile, JSON.stringify(configuration)) + return Promise.resolve() +} /** * Utility function to get server log path @@ -60,13 +62,13 @@ export const updateAppConfiguration = (configuration: AppConfiguration): Promise * @returns {string} The log path. */ export const getServerLogPath = (): string => { - const appConfigurations = getAppConfigurations(); - const logFolderPath = join(appConfigurations.data_folder, "logs"); + const appConfigurations = getAppConfigurations() + const logFolderPath = join(appConfigurations.data_folder, 'logs') if (!fs.existsSync(logFolderPath)) { - fs.mkdirSync(logFolderPath, { recursive: true }); + fs.mkdirSync(logFolderPath, { recursive: true }) } - return join(logFolderPath, "server.log"); -}; + return join(logFolderPath, 'server.log') +} /** * Utility function to get app log path @@ -74,13 +76,13 @@ export const getServerLogPath = (): string => { * @returns {string} The log path. */ export const getAppLogPath = (): string => { - const appConfigurations = getAppConfigurations(); - const logFolderPath = join(appConfigurations.data_folder, "logs"); + const appConfigurations = getAppConfigurations() + const logFolderPath = join(appConfigurations.data_folder, 'logs') if (!fs.existsSync(logFolderPath)) { - fs.mkdirSync(logFolderPath, { recursive: true }); + fs.mkdirSync(logFolderPath, { recursive: true }) } - return join(logFolderPath, "app.log"); -}; + return join(logFolderPath, 'app.log') +} /** * Utility function to get data folder path @@ -88,9 +90,9 @@ export const getAppLogPath = (): string => { * @returns {string} The data folder path. */ export const getJanDataFolderPath = (): string => { - const appConfigurations = getAppConfigurations(); - return appConfigurations.data_folder; -}; + const appConfigurations = getAppConfigurations() + return appConfigurations.data_folder +} /** * Utility function to get extension path @@ -98,6 +100,70 @@ export const getJanDataFolderPath = (): string => { * @returns {string} The extensions path. */ export const getJanExtensionsPath = (): string => { - const appConfigurations = getAppConfigurations(); - return join(appConfigurations.data_folder, "extensions"); -}; + const appConfigurations = getAppConfigurations() + return join(appConfigurations.data_folder, 'extensions') +} + +/** + * Utility function to physical cpu count + * + * @returns {number} The physical cpu count. + */ +export const physicalCpuCount = async (): Promise => { + const platform = os.platform() + if (platform === 'linux') { + const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l') + return parseInt(output.trim(), 10) + } else if (platform === 'darwin') { + const output = await exec('sysctl -n hw.physicalcpu_max') + return parseInt(output.trim(), 10) + } else if (platform === 'win32') { + const output = await exec('WMIC CPU Get NumberOfCores') + return output + .split(os.EOL) + .map((line: string) => parseInt(line)) + .filter((value: number) => !isNaN(value)) + .reduce((sum: number, number: number) => sum + number, 1) + } else { + const cores = os.cpus().filter((cpu: any, index: number) => { + const hasHyperthreading = cpu.model.includes('Intel') + const isOdd = index % 2 === 1 + return !hasHyperthreading || isOdd + }) + return cores.length + } +} + +const exec = async (command: string): Promise => { + return new Promise((resolve, reject) => { + childProcess.exec(command, { encoding: 'utf8' }, (error, stdout) => { + if (error) { + reject(error) + } else { + resolve(stdout) + } + }) + }) +} + +export const getSystemResourceInfo = async (): Promise => { + const cpu = await physicalCpuCount() + const message = `[NITRO]::CPU informations - ${cpu}` + log(message) + logServer(message) + + return { + numCpuPhysicalCore: cpu, + memAvailable: 0, // TODO: this should not be 0 + } +} + +export const getEngineConfiguration = async (engineId: string) => { + if (engineId !== 'openai') { + return undefined + } + const directoryPath = join(getJanDataFolderPath(), 'engines') + const filePath = join(directoryPath, `${engineId}.json`) + const data = fs.readFileSync(filePath, 'utf-8') + return JSON.parse(data) +} diff --git a/core/src/types/assistant/assistantEntity.ts b/core/src/types/assistant/assistantEntity.ts index 91bb2bb22..733dbea8d 100644 --- a/core/src/types/assistant/assistantEntity.ts +++ b/core/src/types/assistant/assistantEntity.ts @@ -2,6 +2,13 @@ * Assistant type defines the shape of an assistant object. * @stored */ + +export type AssistantTool = { + type: string + enabled: boolean + settings: any +} + export type Assistant = { /** Represents the avatar of the user. */ avatar: string @@ -22,7 +29,7 @@ export type Assistant = { /** Represents the instructions for the object. */ instructions?: string /** Represents the tools associated with the object. */ - tools?: any + tools?: AssistantTool[] /** Represents the file identifiers associated with the object. */ file_ids: string[] /** Represents the metadata of the object. */ diff --git a/core/src/types/index.ts b/core/src/types/index.ts index 3bdcb5421..ee6f4ef08 100644 --- a/core/src/types/index.ts +++ b/core/src/types/index.ts @@ -6,3 +6,4 @@ export * from './inference' export * from './monitoring' export * from './file' export * from './config' +export * from './miscellaneous' diff --git a/core/src/types/inference/inferenceEntity.ts b/core/src/types/inference/inferenceEntity.ts index 58b838ae7..c37e3b079 100644 --- a/core/src/types/inference/inferenceEntity.ts +++ b/core/src/types/inference/inferenceEntity.ts @@ -1,3 +1,5 @@ +import { ContentType, ContentValue } from '../message' + /** * The role of the author of this message. */ @@ -13,7 +15,32 @@ export enum ChatCompletionRole { */ export type ChatCompletionMessage = { /** The contents of the message. **/ - content?: string + content?: ChatCompletionMessageContent /** The role of the author of this message. **/ role: ChatCompletionRole } + +export type ChatCompletionMessageContent = + | string + | (ChatCompletionMessageContentText & + ChatCompletionMessageContentImage & + ChatCompletionMessageContentDoc)[] + +export enum ChatCompletionMessageContentType { + Text = 'text', + Image = 'image_url', + Doc = 'doc_url', +} + +export type ChatCompletionMessageContentText = { + type: ChatCompletionMessageContentType + text: string +} +export type ChatCompletionMessageContentImage = { + type: ChatCompletionMessageContentType + image_url: { url: string } +} +export type ChatCompletionMessageContentDoc = { + type: ChatCompletionMessageContentType + doc_url: { url: string } +} diff --git a/core/src/types/message/messageEntity.ts b/core/src/types/message/messageEntity.ts index 199743796..87e4b1997 100644 --- a/core/src/types/message/messageEntity.ts +++ b/core/src/types/message/messageEntity.ts @@ -1,5 +1,6 @@ import { ChatCompletionMessage, ChatCompletionRole } from '../inference' import { ModelInfo } from '../model' +import { Thread } from '../thread' /** * The `ThreadMessage` type defines the shape of a thread's message object. @@ -35,7 +36,10 @@ export type ThreadMessage = { export type MessageRequest = { id?: string - /** The thread id of the message request. **/ + /** + * @deprecated Use thread object instead + * The thread id of the message request. + */ threadId: string /** @@ -48,6 +52,10 @@ export type MessageRequest = { /** Settings for constructing a chat completion request **/ model?: ModelInfo + + /** The thread of this message is belong to. **/ + // TODO: deprecate threadId field + thread?: Thread } /** @@ -62,7 +70,7 @@ export enum MessageStatus { /** Message loaded with error. **/ Error = 'error', /** Message is cancelled streaming */ - Stopped = "stopped" + Stopped = 'stopped', } /** @@ -71,6 +79,7 @@ export enum MessageStatus { export enum ContentType { Text = 'text', Image = 'image', + Pdf = 'pdf', } /** @@ -80,6 +89,8 @@ export enum ContentType { export type ContentValue = { value: string annotations: string[] + name?: string + size?: number } /** diff --git a/core/src/types/miscellaneous/index.ts b/core/src/types/miscellaneous/index.ts new file mode 100644 index 000000000..02c973323 --- /dev/null +++ b/core/src/types/miscellaneous/index.ts @@ -0,0 +1,2 @@ +export * from './systemResourceInfo' +export * from './promptTemplate' diff --git a/core/src/types/miscellaneous/promptTemplate.ts b/core/src/types/miscellaneous/promptTemplate.ts new file mode 100644 index 000000000..a6743c67c --- /dev/null +++ b/core/src/types/miscellaneous/promptTemplate.ts @@ -0,0 +1,6 @@ +export type PromptTemplate = { + system_prompt?: string + ai_prompt?: string + user_prompt?: string + error?: string +} diff --git a/core/src/types/miscellaneous/systemResourceInfo.ts b/core/src/types/miscellaneous/systemResourceInfo.ts new file mode 100644 index 000000000..1472cda47 --- /dev/null +++ b/core/src/types/miscellaneous/systemResourceInfo.ts @@ -0,0 +1,4 @@ +export type SystemResourceInfo = { + numCpuPhysicalCore: number + memAvailable: number +} diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts index 80adc9e96..644c34dfb 100644 --- a/core/src/types/model/modelEntity.ts +++ b/core/src/types/model/modelEntity.ts @@ -7,6 +7,7 @@ export type ModelInfo = { settings: ModelSettingParams parameters: ModelRuntimeParams engine?: InferenceEngine + proxyEngine?: InferenceEngine } /** @@ -18,7 +19,13 @@ export enum InferenceEngine { nitro = 'nitro', openai = 'openai', triton_trtllm = 'triton_trtllm', - hf_endpoint = 'hf_endpoint', + + tool_retrieval_enabled = 'tool_retrieval_enabled', +} + +export type ModelArtifact = { + filename: string + url: string } /** @@ -45,7 +52,7 @@ export type Model = { /** * The model download source. It can be an external url or a local filepath. */ - source_url: string + sources: ModelArtifact[] /** * The model identifier, which can be referenced in the API endpoints. @@ -85,6 +92,13 @@ export type Model = { * The model engine. */ engine: InferenceEngine + + proxyEngine?: InferenceEngine + + /** + * Is multimodal or not. + */ + visionModel?: boolean } export type ModelMetadata = { @@ -107,6 +121,9 @@ export type ModelSettingParams = { system_prompt?: string ai_prompt?: string user_prompt?: string + llama_model_path?: string + mmproj?: string + cont_batching?: boolean } /** @@ -122,4 +139,5 @@ export type ModelRuntimeParams = { stop?: string[] frequency_penalty?: number presence_penalty?: number + engine?: string } diff --git a/core/src/types/thread/index.ts b/core/src/types/thread/index.ts index c6ff6204a..32155e1cd 100644 --- a/core/src/types/thread/index.ts +++ b/core/src/types/thread/index.ts @@ -1,2 +1,3 @@ export * from './threadEntity' export * from './threadInterface' +export * from './threadEvent' diff --git a/core/src/types/thread/threadEntity.ts b/core/src/types/thread/threadEntity.ts index 4ff3aa1fc..dd88b10ec 100644 --- a/core/src/types/thread/threadEntity.ts +++ b/core/src/types/thread/threadEntity.ts @@ -1,3 +1,4 @@ +import { AssistantTool } from '../assistant' import { ModelInfo } from '../model' /** @@ -30,6 +31,7 @@ export type ThreadAssistantInfo = { assistant_name: string model: ModelInfo instructions?: string + tools?: AssistantTool[] } /** @@ -41,5 +43,4 @@ export type ThreadState = { waitingForResponse: boolean error?: Error lastMessage?: string - isFinishInit?: boolean } diff --git a/core/src/types/thread/threadEvent.ts b/core/src/types/thread/threadEvent.ts new file mode 100644 index 000000000..4b19b09c1 --- /dev/null +++ b/core/src/types/thread/threadEvent.ts @@ -0,0 +1,4 @@ +export enum ThreadEvent { + /** The `OnThreadStarted` event is emitted when a thread is started. */ + OnThreadStarted = 'OnThreadStarted', +} diff --git a/docs/.env.example b/docs/.env.example index 6755f2520..b4a7fa5f1 100644 --- a/docs/.env.example +++ b/docs/.env.example @@ -1,5 +1,5 @@ GTM_ID=xxxx -POSTHOG_PROJECT_API_KEY=xxxx -POSTHOG_APP_URL=xxxx +UMAMI_PROJECT_API_KEY=xxxx +UMAMI_APP_URL=xxxx ALGOLIA_API_KEY=xxxx ALGOLIA_APP_ID=xxxx \ No newline at end of file diff --git a/docs/docs/docs/03-engineering/models.md b/docs/docs/docs/03-engineering/models.md index cb0b44f41..4e4c3c604 100644 --- a/docs/docs/docs/03-engineering/models.md +++ b/docs/docs/docs/03-engineering/models.md @@ -56,7 +56,6 @@ jan/ # Jan root folder - Each `model` folder contains a `model.json` file, which is a representation of a model. - `model.json` contains metadata and default parameters used to run a model. -- The only required field is `source_url`. ### Example @@ -64,36 +63,43 @@ Here's a standard example `model.json` for a GGUF model. ```js { -"id": "zephyr-7b", // Defaults to foldername -"object": "model", // Defaults to "model" -"source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf", -"name": "Zephyr 7B", // Defaults to foldername -"owned_by": "you", // Defaults to "you" -"version": "1", // Defaults to 1 -"created": 1231231, // Defaults to file creation time -"description": null, // Defaults to null -"state": enum[null, "ready"] -"format": "ggufv3", // Defaults to "ggufv3" -"engine": "nitro", // engine_id specified in jan/engine folder -"engine_parameters": { // Engine parameters inside model.json can override - "ctx_len": 2048, // the value inside the base engine.json + "id": "zephyr-7b", // Defaults to foldername + "object": "model", // Defaults to "model" + "sources": [ + { + "filename": "zephyr-7b-beta.Q4_K_M.gguf", + "url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf" + } + ], + "name": "Zephyr 7B", // Defaults to foldername + "owned_by": "you", // Defaults to "you" + "version": "1", // Defaults to 1 + "created": 1231231, // Defaults to file creation time + "description": null, // Defaults to null + "format": "ggufv3", // Defaults to "ggufv3" + "engine": "nitro", // engine_id specified in jan/engine folder + "engine_parameters": { + // Engine parameters inside model.json can override + "ctx_len": 4096, // the value inside the base engine.json "ngl": 100, "embedding": true, - "n_parallel": 4, -}, -"model_parameters": { // Models are called parameters + "n_parallel": 4 + }, + "model_parameters": { + // Models are called parameters "stream": true, - "max_tokens": 2048, - "stop": [""], // This usually can be left blank, only used with specific need from model author + "max_tokens": 4096, + "stop": [""], // This usually can be left blank, only used with specific need from model author "frequency_penalty": 0, "presence_penalty": 0, "temperature": 0.7, "top_p": 0.95 -}, -"metadata": {}, // Defaults to {} -"assets": [ // Defaults to current dir - "file://.../zephyr-7b-q4_k_m.bin", -] + }, + "metadata": {}, // Defaults to {} + "assets": [ + // Defaults to current dir + "file://.../zephyr-7b-q4_k_m.bin" + ] } ``` diff --git a/docs/docs/guides/04-using-models/02-import-manually.mdx b/docs/docs/guides/04-using-models/02-import-manually.mdx index f5ecb5259..68142a8af 100644 --- a/docs/docs/guides/04-using-models/02-import-manually.mdx +++ b/docs/docs/guides/04-using-models/02-import-manually.mdx @@ -31,7 +31,6 @@ In this section, we will show you how to import a GGUF model from [HuggingFace]( ## Manually Importing a Downloaded Model (nightly versions and v0.4.4+) - ### 1. Create a Model Folder Navigate to the `~/jan/models` folder. You can find this folder by going to `App Settings` > `Advanced` > `Open App Directory`. @@ -92,7 +91,7 @@ Drag and drop your model binary into this folder, ensuring the `modelname.gguf` #### 3. Voila -If your model doesn't show up in the Model Selector in conversations, please restart the app. +If your model doesn't show up in the Model Selector in conversations, please restart the app. If that doesn't work, please feel free to join our [Discord community](https://discord.gg/Dt7MxDyNNZ) for support, updates, and discussions. @@ -190,14 +189,18 @@ Edit `model.json` and include the following configurations: - Ensure the filename must be `model.json`. - Ensure the `id` property matches the folder name you created. - Ensure the GGUF filename should match the `id` property exactly. -- Ensure the `source_url` property is the direct binary download link ending in `.gguf`. In HuggingFace, you can find the direct links in the `Files and versions` tab. +- Ensure the `source.url` property is the direct binary download link ending in `.gguf`. In HuggingFace, you can find the direct links in the `Files and versions` tab. - Ensure you are using the correct `prompt_template`. This is usually provided in the HuggingFace model's description page. -- Ensure the `state` property is set to `ready`. ```json title="model.json" { // highlight-start - "source_url": "https://huggingface.co/janhq/trinity-v1-GGUF/resolve/main/trinity-v1.Q4_K_M.gguf", + "sources": [ + { + "filename": "trinity-v1.Q4_K_M.gguf", + "url": "https://huggingface.co/janhq/trinity-v1-GGUF/resolve/main/trinity-v1.Q4_K_M.gguf" + } + ], "id": "trinity-v1-7b", // highlight-end "object": "model", @@ -208,7 +211,8 @@ Edit `model.json` and include the following configurations: "settings": { "ctx_len": 4096, // highlight-next-line - "prompt_template": "{system_message}\n### Instruction:\n{prompt}\n### Response:" + "prompt_template": "{system_message}\n### Instruction:\n{prompt}\n### Response:", + "llama_model_path": "trinity-v1.Q4_K_M.gguf" }, "parameters": { "max_tokens": 4096 @@ -218,9 +222,7 @@ Edit `model.json` and include the following configurations: "tags": ["7B", "Merged"], "size": 4370000000 }, - "engine": "nitro", - // highlight-next-line - "state": "ready" + "engine": "nitro" } ``` diff --git a/docs/docs/guides/04-using-models/03-integrate-with-remote-server.mdx b/docs/docs/guides/04-using-models/03-integrate-with-remote-server.mdx index 3ed15bc9f..533797fca 100644 --- a/docs/docs/guides/04-using-models/03-integrate-with-remote-server.mdx +++ b/docs/docs/guides/04-using-models/03-integrate-with-remote-server.mdx @@ -40,7 +40,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `gpt-3.5-turbo-16k` ```json title="~/jan/models/gpt-3.5-turbo-16k/model.json" { - "source_url": "https://openai.com", + "sources": [ + { + "filename": "openai", + "url": "https://openai.com" + } + ], // highlight-next-line "id": "gpt-3.5-turbo-16k", "object": "model", @@ -55,8 +60,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `gpt-3.5-turbo-16k` "author": "OpenAI", "tags": ["General", "Big Context Length"] }, - "engine": "openai", - "state": "ready" + "engine": "openai" // highlight-end } ``` @@ -118,7 +122,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `mistral-ins-7b-q4` ```json title="~/jan/models/mistral-ins-7b-q4/model.json" { - "source_url": "https://jan.ai", + "sources": [ + { + "filename": "janai", + "url": "https://jan.ai" + } + ], // highlight-next-line "id": "mistral-ins-7b-q4", "object": "model", @@ -134,8 +143,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `mistral-ins-7b-q4` "tags": ["remote", "awesome"] }, // highlight-start - "engine": "openai", - "state": "ready" + "engine": "openai" // highlight-end } ``` diff --git a/docs/docs/guides/07-integrations/02-integrate-openrouter.mdx b/docs/docs/guides/07-integrations/02-integrate-openrouter.mdx index 8623a1a4a..e0db0e336 100644 --- a/docs/docs/guides/07-integrations/02-integrate-openrouter.mdx +++ b/docs/docs/guides/07-integrations/02-integrate-openrouter.mdx @@ -49,7 +49,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `} - A promise that resolves with the result. + */ + ipcMain.handle( + AppRoute.isSubdirectory, + async (_event, from: string, to: string) => { + const relative = getRelative(from, to) + const isSubdir = + relative && !relative.startsWith('..') && !isAbsolute(relative) + + if (isSubdir === '') return false + else return isSubdir + } + ) + /** * Retrieve basename from given path, respect to the current OS. */ diff --git a/electron/handlers/fileManager.ts b/electron/handlers/fileManager.ts index f41286934..e328cb53b 100644 --- a/electron/handlers/fileManager.ts +++ b/electron/handlers/fileManager.ts @@ -1,4 +1,4 @@ -import { ipcMain } from 'electron' +import { ipcMain, app } from 'electron' // @ts-ignore import reflect from '@alumna/reflect' @@ -38,6 +38,10 @@ export function handleFileMangerIPCs() { getResourcePath() ) + ipcMain.handle(FileManagerRoute.getUserHomePath, async (_event) => + app.getPath('home') + ) + // handle fs is directory here ipcMain.handle( FileManagerRoute.fileStat, @@ -59,4 +63,20 @@ export function handleFileMangerIPCs() { return fileStat } ) + + ipcMain.handle( + FileManagerRoute.writeBlob, + async (_event, path: string, data: string): Promise => { + try { + const normalizedPath = normalizeFilePath(path) + const dataBuffer = Buffer.from(data, 'base64') + fs.writeFileSync( + join(getJanDataFolderPath(), normalizedPath), + dataBuffer + ) + } catch (err) { + console.error(`writeFile ${path} result: ${err}`) + } + } + ) } diff --git a/electron/handlers/fs.ts b/electron/handlers/fs.ts index 408a5fd10..34026b940 100644 --- a/electron/handlers/fs.ts +++ b/electron/handlers/fs.ts @@ -1,9 +1,9 @@ import { ipcMain } from 'electron' -import { FileSystemRoute } from '@janhq/core' -import { join } from 'path' import { getJanDataFolderPath, normalizeFilePath } from '@janhq/core/node' - +import fs from 'fs' +import { FileManagerRoute, FileSystemRoute } from '@janhq/core' +import { join } from 'path' /** * Handles file system operations. */ @@ -15,7 +15,7 @@ export function handleFsIPCs() { mdl[route]( ...args.map((arg) => typeof arg === 'string' && - (arg.includes(`file:/`) || arg.includes(`file:\\`)) + (arg.startsWith(`file:/`) || arg.startsWith(`file:\\`)) ? join(getJanDataFolderPath(), normalizeFilePath(arg)) : arg ) diff --git a/electron/main.ts b/electron/main.ts index fb7066cd0..5d7e59c0f 100644 --- a/electron/main.ts +++ b/electron/main.ts @@ -28,6 +28,22 @@ import { setupCore } from './utils/setup' app .whenReady() + .then(async () => { + if (!app.isPackaged) { + // Which means you're running from source code + const { default: installExtension, REACT_DEVELOPER_TOOLS } = await import( + 'electron-devtools-installer' + ) // Don't use import on top level, since the installer package is dev-only + try { + const name = installExtension(REACT_DEVELOPER_TOOLS) + console.log(`Added Extension: ${name}`) + } catch (err) { + console.log('An error occurred while installing devtools:') + console.error(err) + // Only log the error and don't throw it because it's not critical + } + } + }) .then(setupCore) .then(createUserSpace) .then(migrateExtensions) diff --git a/electron/package.json b/electron/package.json index 173e54f2b..2892fedc6 100644 --- a/electron/package.json +++ b/electron/package.json @@ -86,7 +86,7 @@ "request": "^2.88.2", "request-progress": "^3.0.0", "rimraf": "^5.0.5", - "typescript": "^5.3.3", + "typescript": "^5.2.2", "ulid": "^2.3.0", "use-debounce": "^9.0.4" }, @@ -99,6 +99,7 @@ "@typescript-eslint/parser": "^6.7.3", "electron": "28.0.0", "electron-builder": "^24.9.1", + "electron-devtools-installer": "^3.2.0", "electron-playwright-helpers": "^1.6.0", "eslint-plugin-react": "^7.33.2", "run-script-os": "^1.1.6" diff --git a/electron/playwright.config.ts b/electron/playwright.config.ts index 98b2c7b45..1fa3313f2 100644 --- a/electron/playwright.config.ts +++ b/electron/playwright.config.ts @@ -1,9 +1,9 @@ -import { PlaywrightTestConfig } from "@playwright/test"; +import { PlaywrightTestConfig } from '@playwright/test' const config: PlaywrightTestConfig = { - testDir: "./tests", + testDir: './tests', retries: 0, - timeout: 120000, -}; + globalTimeout: 300000, +} -export default config; +export default config diff --git a/electron/tests/explore.e2e.spec.ts b/electron/tests/hub.e2e.spec.ts similarity index 71% rename from electron/tests/explore.e2e.spec.ts rename to electron/tests/hub.e2e.spec.ts index 77eb3dbda..cc72e037e 100644 --- a/electron/tests/explore.e2e.spec.ts +++ b/electron/tests/hub.e2e.spec.ts @@ -9,6 +9,7 @@ import { let electronApp: ElectronApplication let page: Page +const TIMEOUT: number = parseInt(process.env.TEST_TIMEOUT || '300000') test.beforeAll(async () => { process.env.CI = 'e2e' @@ -26,7 +27,9 @@ test.beforeAll(async () => { }) await stubDialog(electronApp, 'showMessageBox', { response: 1 }) - page = await electronApp.firstWindow() + page = await electronApp.firstWindow({ + timeout: TIMEOUT, + }) }) test.afterAll(async () => { @@ -34,8 +37,12 @@ test.afterAll(async () => { await page.close() }) -test('explores models', async () => { - await page.getByTestId('Hub').first().click() - await page.getByTestId('testid-explore-models').isVisible() - // More test cases here... +test('explores hub', async () => { + test.setTimeout(TIMEOUT) + await page.getByTestId('Hub').first().click({ + timeout: TIMEOUT, + }) + await page.getByTestId('hub-container-test-id').isVisible({ + timeout: TIMEOUT, + }) }) diff --git a/electron/tests/main.e2e.spec.ts b/electron/tests/main.e2e.spec.ts deleted file mode 100644 index 1a5bfe696..000000000 --- a/electron/tests/main.e2e.spec.ts +++ /dev/null @@ -1,55 +0,0 @@ -import { _electron as electron } from 'playwright' -import { ElectronApplication, Page, expect, test } from '@playwright/test' - -import { - findLatestBuild, - parseElectronApp, - stubDialog, -} from 'electron-playwright-helpers' - -let electronApp: ElectronApplication -let page: Page - -test.beforeAll(async () => { - process.env.CI = 'e2e' - - const latestBuild = findLatestBuild('dist') - expect(latestBuild).toBeTruthy() - - // parse the packaged Electron app and find paths and other info - const appInfo = parseElectronApp(latestBuild) - expect(appInfo).toBeTruthy() - expect(appInfo.asar).toBe(true) - expect(appInfo.executable).toBeTruthy() - expect(appInfo.main).toBeTruthy() - expect(appInfo.name).toBe('jan') - expect(appInfo.packageJson).toBeTruthy() - expect(appInfo.packageJson.name).toBe('jan') - expect(appInfo.platform).toBeTruthy() - expect(appInfo.platform).toBe(process.platform) - expect(appInfo.resourcesDir).toBeTruthy() - - electronApp = await electron.launch({ - args: [appInfo.main], // main file from package.json - executablePath: appInfo.executable, // path to the Electron executable - }) - await stubDialog(electronApp, 'showMessageBox', { response: 1 }) - - page = await electronApp.firstWindow() -}) - -test.afterAll(async () => { - await electronApp.close() - await page.close() -}) - -test('renders the home page', async () => { - expect(page).toBeDefined() - - // Welcome text is available - const welcomeText = await page - .getByTestId('testid-welcome-title') - .first() - .isVisible() - expect(welcomeText).toBe(false) -}) diff --git a/electron/tests/navigation.e2e.spec.ts b/electron/tests/navigation.e2e.spec.ts index 2f4f7b767..5c8721c2f 100644 --- a/electron/tests/navigation.e2e.spec.ts +++ b/electron/tests/navigation.e2e.spec.ts @@ -9,6 +9,7 @@ import { let electronApp: ElectronApplication let page: Page +const TIMEOUT: number = parseInt(process.env.TEST_TIMEOUT || '300000') test.beforeAll(async () => { process.env.CI = 'e2e' @@ -26,7 +27,9 @@ test.beforeAll(async () => { }) await stubDialog(electronApp, 'showMessageBox', { response: 1 }) - page = await electronApp.firstWindow() + page = await electronApp.firstWindow({ + timeout: TIMEOUT, + }) }) test.afterAll(async () => { @@ -35,20 +38,24 @@ test.afterAll(async () => { }) test('renders left navigation panel', async () => { - // Chat section should be there - const chatSection = await page.getByTestId('Chat').first().isVisible() - expect(chatSection).toBe(false) - - // Home actions - /* Disable unstable feature tests - ** const botBtn = await page.getByTestId("Bot").first().isEnabled(); - ** Enable back when it is whitelisted - */ - + test.setTimeout(TIMEOUT) const systemMonitorBtn = await page .getByTestId('System Monitor') .first() - .isEnabled() - const settingsBtn = await page.getByTestId('Settings').first().isEnabled() + .isEnabled({ + timeout: TIMEOUT, + }) + const settingsBtn = await page + .getByTestId('Thread') + .first() + .isEnabled({ timeout: TIMEOUT }) expect([systemMonitorBtn, settingsBtn].filter((e) => !e).length).toBe(0) + // Chat section should be there + await page.getByTestId('Local API Server').first().click({ + timeout: TIMEOUT, + }) + const localServer = await page.getByTestId('local-server-testid').first() + await expect(localServer).toBeVisible({ + timeout: TIMEOUT, + }) }) diff --git a/electron/tests/settings.e2e.spec.ts b/electron/tests/settings.e2e.spec.ts index 798504c70..ad2d7b4a4 100644 --- a/electron/tests/settings.e2e.spec.ts +++ b/electron/tests/settings.e2e.spec.ts @@ -9,6 +9,7 @@ import { let electronApp: ElectronApplication let page: Page +const TIMEOUT: number = parseInt(process.env.TEST_TIMEOUT || '300000') test.beforeAll(async () => { process.env.CI = 'e2e' @@ -26,7 +27,9 @@ test.beforeAll(async () => { }) await stubDialog(electronApp, 'showMessageBox', { response: 1 }) - page = await electronApp.firstWindow() + page = await electronApp.firstWindow({ + timeout: TIMEOUT, + }) }) test.afterAll(async () => { @@ -35,6 +38,8 @@ test.afterAll(async () => { }) test('shows settings', async () => { - await page.getByTestId('Settings').first().click() - await page.getByTestId('testid-setting-description').isVisible() + test.setTimeout(TIMEOUT) + await page.getByTestId('Settings').first().click({ timeout: TIMEOUT }) + const settingDescription = page.getByTestId('testid-setting-description') + await expect(settingDescription).toBeVisible({ timeout: TIMEOUT }) }) diff --git a/electron/tests/system-monitor.e2e.spec.ts b/electron/tests/system-monitor.e2e.spec.ts deleted file mode 100644 index 747a8ae18..000000000 --- a/electron/tests/system-monitor.e2e.spec.ts +++ /dev/null @@ -1,41 +0,0 @@ -import { _electron as electron } from 'playwright' -import { ElectronApplication, Page, expect, test } from '@playwright/test' - -import { - findLatestBuild, - parseElectronApp, - stubDialog, -} from 'electron-playwright-helpers' - -let electronApp: ElectronApplication -let page: Page - -test.beforeAll(async () => { - process.env.CI = 'e2e' - - const latestBuild = findLatestBuild('dist') - expect(latestBuild).toBeTruthy() - - // parse the packaged Electron app and find paths and other info - const appInfo = parseElectronApp(latestBuild) - expect(appInfo).toBeTruthy() - - electronApp = await electron.launch({ - args: [appInfo.main], // main file from package.json - executablePath: appInfo.executable, // path to the Electron executable - }) - await stubDialog(electronApp, 'showMessageBox', { response: 1 }) - - page = await electronApp.firstWindow() -}) - -test.afterAll(async () => { - await electronApp.close() - await page.close() -}) - -test('shows system monitor', async () => { - await page.getByTestId('System Monitor').first().click() - await page.getByTestId('testid-system-monitor').isVisible() - // More test cases here... -}) diff --git a/extensions/assistant-extension/package.json b/extensions/assistant-extension/package.json index 4e84aa573..84bcdf47e 100644 --- a/extensions/assistant-extension/package.json +++ b/extensions/assistant-extension/package.json @@ -3,26 +3,50 @@ "version": "1.0.0", "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models", "main": "dist/index.js", - "module": "dist/module.js", + "node": "dist/node/index.js", "author": "Jan ", "license": "AGPL-3.0", "scripts": { - "build": "tsc -b . && webpack --config webpack.config.js", - "build:publish": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../electron/pre-install" + "build": "tsc --module commonjs && rollup -c rollup.config.ts", + "build:publish:linux": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../electron/pre-install", + "build:publish:darwin": "rimraf *.tgz --glob && npm run build && ../../.github/scripts/auto-sign.sh && npm pack && cpx *.tgz ../../electron/pre-install", + "build:publish:win32": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../electron/pre-install", + "build:publish": "run-script-os" }, "devDependencies": { + "@rollup/plugin-commonjs": "^25.0.7", + "@rollup/plugin-json": "^6.1.0", + "@rollup/plugin-node-resolve": "^15.2.3", + "@rollup/plugin-replace": "^5.0.5", + "@types/pdf-parse": "^1.1.4", + "cpx": "^1.5.0", "rimraf": "^3.0.2", - "webpack": "^5.88.2", - "webpack-cli": "^5.1.4" + "rollup": "^2.38.5", + "rollup-plugin-define": "^1.0.1", + "rollup-plugin-sourcemaps": "^0.6.3", + "rollup-plugin-typescript2": "^0.36.0", + "typescript": "^5.3.3", + "run-script-os": "^1.1.6" }, "dependencies": { "@janhq/core": "file:../../core", + "@langchain/community": "0.0.13", + "hnswlib-node": "^1.4.2", + "langchain": "^0.0.214", "path-browserify": "^1.0.1", + "pdf-parse": "^1.1.1", "ts-loader": "^9.5.0" }, "files": [ "dist/*", "package.json", "README.md" + ], + "bundleDependencies": [ + "@janhq/core", + "@langchain/community", + "hnswlib-node", + "langchain", + "pdf-parse" ] } diff --git a/extensions/assistant-extension/rollup.config.ts b/extensions/assistant-extension/rollup.config.ts new file mode 100644 index 000000000..7916ef9c8 --- /dev/null +++ b/extensions/assistant-extension/rollup.config.ts @@ -0,0 +1,81 @@ +import resolve from "@rollup/plugin-node-resolve"; +import commonjs from "@rollup/plugin-commonjs"; +import sourceMaps from "rollup-plugin-sourcemaps"; +import typescript from "rollup-plugin-typescript2"; +import json from "@rollup/plugin-json"; +import replace from "@rollup/plugin-replace"; + +const packageJson = require("./package.json"); + +const pkg = require("./package.json"); + +export default [ + { + input: `src/index.ts`, + output: [{ file: pkg.main, format: "es", sourcemap: true }], + // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash') + external: [], + watch: { + include: "src/**", + }, + plugins: [ + replace({ + NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), + EXTENSION_NAME: JSON.stringify(packageJson.name), + VERSION: JSON.stringify(packageJson.version), + }), + // Allow json resolution + json(), + // Compile TypeScript files + typescript({ useTsconfigDeclarationDir: true }), + // Compile TypeScript files + // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) + commonjs(), + // Allow node_modules resolution, so you can use 'external' to control + // which external modules to include in the bundle + // https://github.com/rollup/rollup-plugin-node-resolve#usage + resolve({ + extensions: [".js", ".ts", ".svelte"], + }), + + // Resolve source maps to the original source + sourceMaps(), + ], + }, + { + input: `src/node/index.ts`, + output: [{ dir: "dist/node", format: "cjs", sourcemap: false }], + // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash') + external: [ + "@janhq/core/node", + "@langchain/community", + "langchain", + "langsmith", + "path", + "hnswlib-node", + ], + watch: { + include: "src/node/**", + }, + // inlineDynamicImports: true, + plugins: [ + // Allow json resolution + json(), + // Compile TypeScript files + typescript({ useTsconfigDeclarationDir: true }), + // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) + commonjs({ + ignoreDynamicRequires: true, + }), + // Allow node_modules resolution, so you can use 'external' to control + // which external modules to include in the bundle + // https://github.com/rollup/rollup-plugin-node-resolve#usage + resolve({ + extensions: [".ts", ".js", ".json"], + }), + + // Resolve source maps to the original source + // sourceMaps(), + ], + }, +]; diff --git a/extensions/assistant-extension/src/@types/global.d.ts b/extensions/assistant-extension/src/@types/global.d.ts index 3b45ccc5a..dc11709a4 100644 --- a/extensions/assistant-extension/src/@types/global.d.ts +++ b/extensions/assistant-extension/src/@types/global.d.ts @@ -1 +1,3 @@ -declare const MODULE: string; +declare const NODE: string; +declare const EXTENSION_NAME: string; +declare const VERSION: string; diff --git a/extensions/assistant-extension/src/index.ts b/extensions/assistant-extension/src/index.ts index 96de33b7b..6495ea786 100644 --- a/extensions/assistant-extension/src/index.ts +++ b/extensions/assistant-extension/src/index.ts @@ -1,16 +1,151 @@ -import { fs, Assistant } from "@janhq/core"; -import { AssistantExtension } from "@janhq/core"; -import { join } from "path"; +import { + fs, + Assistant, + MessageRequest, + events, + InferenceEngine, + MessageEvent, + InferenceEvent, + joinPath, + executeOnMain, + AssistantExtension, +} from "@janhq/core"; export default class JanAssistantExtension extends AssistantExtension { private static readonly _homeDir = "file://assistants"; + controller = new AbortController(); + isCancelled = false; + retrievalThreadId: string | undefined = undefined; + async onLoad() { // making the assistant directory - if (!(await fs.existsSync(JanAssistantExtension._homeDir))) - fs.mkdirSync(JanAssistantExtension._homeDir).then(() => { - this.createJanAssistant(); - }); + const assistantDirExist = await fs.existsSync( + JanAssistantExtension._homeDir, + ); + if ( + localStorage.getItem(`${EXTENSION_NAME}-version`) !== VERSION || + !assistantDirExist + ) { + if (!assistantDirExist) + await fs.mkdirSync(JanAssistantExtension._homeDir); + + // Write assistant metadata + this.createJanAssistant(); + // Finished migration + localStorage.setItem(`${EXTENSION_NAME}-version`, VERSION); + } + + // Events subscription + events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => + JanAssistantExtension.handleMessageRequest(data, this), + ); + + events.on(InferenceEvent.OnInferenceStopped, () => { + JanAssistantExtension.handleInferenceStopped(this); + }); + } + + private static async handleInferenceStopped(instance: JanAssistantExtension) { + instance.isCancelled = true; + instance.controller?.abort(); + } + + private static async handleMessageRequest( + data: MessageRequest, + instance: JanAssistantExtension, + ) { + instance.isCancelled = false; + instance.controller = new AbortController(); + + if ( + data.model?.engine !== InferenceEngine.tool_retrieval_enabled || + !data.messages || + !data.thread?.assistants[0]?.tools + ) { + return; + } + + const latestMessage = data.messages[data.messages.length - 1]; + + // Ingest the document if needed + if ( + latestMessage && + latestMessage.content && + typeof latestMessage.content !== "string" + ) { + const docFile = latestMessage.content[1]?.doc_url?.url; + if (docFile) { + await executeOnMain( + NODE, + "toolRetrievalIngestNewDocument", + docFile, + data.model?.proxyEngine, + ); + } + } + + // Load agent on thread changed + if (instance.retrievalThreadId !== data.threadId) { + await executeOnMain(NODE, "toolRetrievalLoadThreadMemory", data.threadId); + + instance.retrievalThreadId = data.threadId; + + // Update the text splitter + await executeOnMain( + NODE, + "toolRetrievalUpdateTextSplitter", + data.thread.assistants[0].tools[0]?.settings?.chunk_size ?? 4000, + data.thread.assistants[0].tools[0]?.settings?.chunk_overlap ?? 200, + ); + } + + if (latestMessage.content) { + const prompt = + typeof latestMessage.content === "string" + ? latestMessage.content + : latestMessage.content[0].text; + // Retrieve the result + console.debug("toolRetrievalQuery", latestMessage.content); + const retrievalResult = await executeOnMain( + NODE, + "toolRetrievalQueryResult", + prompt, + ); + + // Update the message content + // Using the retrieval template with the result and query + if (data.thread?.assistants[0].tools) + data.messages[data.messages.length - 1].content = + data.thread.assistants[0].tools[0].settings?.retrieval_template + ?.replace("{CONTEXT}", retrievalResult) + .replace("{QUESTION}", prompt); + } + + // Filter out all the messages that are not text + data.messages = data.messages.map((message) => { + if ( + message.content && + typeof message.content !== "string" && + (message.content.length ?? 0) > 0 + ) { + return { + ...message, + content: [message.content[0]], + }; + } + return message; + }); + + // Reroute the result to inference engine + const output = { + ...data, + model: { + ...data.model, + engine: data.model.proxyEngine, + }, + }; + events.emit(MessageEvent.OnMessageSent, output); } /** @@ -19,15 +154,21 @@ export default class JanAssistantExtension extends AssistantExtension { onUnload(): void {} async createAssistant(assistant: Assistant): Promise { - const assistantDir = join(JanAssistantExtension._homeDir, assistant.id); + const assistantDir = await joinPath([ + JanAssistantExtension._homeDir, + assistant.id, + ]); if (!(await fs.existsSync(assistantDir))) await fs.mkdirSync(assistantDir); // store the assistant metadata json - const assistantMetadataPath = join(assistantDir, "assistant.json"); + const assistantMetadataPath = await joinPath([ + assistantDir, + "assistant.json", + ]); try { await fs.writeFileSync( assistantMetadataPath, - JSON.stringify(assistant, null, 2) + JSON.stringify(assistant, null, 2), ); } catch (err) { console.error(err); @@ -39,14 +180,17 @@ export default class JanAssistantExtension extends AssistantExtension { // get all the assistant metadata json const results: Assistant[] = []; const allFileName: string[] = await fs.readdirSync( - JanAssistantExtension._homeDir + JanAssistantExtension._homeDir, ); for (const fileName of allFileName) { - const filePath = join(JanAssistantExtension._homeDir, fileName); + const filePath = await joinPath([ + JanAssistantExtension._homeDir, + fileName, + ]); if (filePath.includes(".DS_Store")) continue; const jsonFiles: string[] = (await fs.readdirSync(filePath)).filter( - (file: string) => file === "assistant.json" + (file: string) => file === "assistant.json", ); if (jsonFiles.length !== 1) { @@ -55,8 +199,8 @@ export default class JanAssistantExtension extends AssistantExtension { } const content = await fs.readFileSync( - join(filePath, jsonFiles[0]), - "utf-8" + await joinPath([filePath, jsonFiles[0]]), + "utf-8", ); const assistant: Assistant = typeof content === "object" ? content : JSON.parse(content); @@ -73,7 +217,10 @@ export default class JanAssistantExtension extends AssistantExtension { } // remove the directory - const assistantDir = join(JanAssistantExtension._homeDir, assistant.id); + const assistantDir = await joinPath([ + JanAssistantExtension._homeDir, + assistant.id, + ]); await fs.rmdirSync(assistantDir); return Promise.resolve(); } @@ -89,7 +236,24 @@ export default class JanAssistantExtension extends AssistantExtension { description: "A default assistant that can use all downloaded models", model: "*", instructions: "", - tools: undefined, + tools: [ + { + type: "retrieval", + enabled: false, + settings: { + top_k: 2, + chunk_size: 1024, + chunk_overlap: 64, + retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. + ---------------- + CONTEXT: {CONTEXT} + ---------------- + QUESTION: {QUESTION} + ---------------- + Helpful Answer:`, + }, + }, + ], file_ids: [], metadata: undefined, }; diff --git a/extensions/assistant-extension/src/node/engine.ts b/extensions/assistant-extension/src/node/engine.ts new file mode 100644 index 000000000..54b2a6ba1 --- /dev/null +++ b/extensions/assistant-extension/src/node/engine.ts @@ -0,0 +1,13 @@ +import fs from "fs"; +import path from "path"; +import { getJanDataFolderPath } from "@janhq/core/node"; + +// Sec: Do not send engine settings over requests +// Read it manually instead +export const readEmbeddingEngine = (engineName: string) => { + const engineSettings = fs.readFileSync( + path.join(getJanDataFolderPath(), "engines", `${engineName}.json`), + "utf-8", + ); + return JSON.parse(engineSettings); +}; diff --git a/extensions/assistant-extension/src/node/index.ts b/extensions/assistant-extension/src/node/index.ts new file mode 100644 index 000000000..95a7243a4 --- /dev/null +++ b/extensions/assistant-extension/src/node/index.ts @@ -0,0 +1,39 @@ +import { getJanDataFolderPath, normalizeFilePath } from "@janhq/core/node"; +import { Retrieval } from "./tools/retrieval"; +import path from "path"; + +const retrieval = new Retrieval(); + +export async function toolRetrievalUpdateTextSplitter( + chunkSize: number, + chunkOverlap: number, +) { + retrieval.updateTextSplitter(chunkSize, chunkOverlap); + return Promise.resolve(); +} +export async function toolRetrievalIngestNewDocument( + file: string, + engine: string, +) { + const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file)); + const threadPath = path.dirname(filePath.replace("files", "")); + retrieval.updateEmbeddingEngine(engine); + await retrieval.ingestAgentKnowledge(filePath, `${threadPath}/memory`); + return Promise.resolve(); +} + +export async function toolRetrievalLoadThreadMemory(threadId: string) { + try { + await retrieval.loadRetrievalAgent( + path.join(getJanDataFolderPath(), "threads", threadId, "memory"), + ); + return Promise.resolve(); + } catch (err) { + console.debug(err); + } +} + +export async function toolRetrievalQueryResult(query: string) { + const res = await retrieval.generateResult(query); + return Promise.resolve(res); +} diff --git a/extensions/assistant-extension/src/node/tools/retrieval/index.ts b/extensions/assistant-extension/src/node/tools/retrieval/index.ts new file mode 100644 index 000000000..cd7e9abb1 --- /dev/null +++ b/extensions/assistant-extension/src/node/tools/retrieval/index.ts @@ -0,0 +1,79 @@ +import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; +import { formatDocumentsAsString } from "langchain/util/document"; +import { PDFLoader } from "langchain/document_loaders/fs/pdf"; + +import { HNSWLib } from "langchain/vectorstores/hnswlib"; + +import { OpenAIEmbeddings } from "langchain/embeddings/openai"; +import { readEmbeddingEngine } from "../../engine"; + +export class Retrieval { + public chunkSize: number = 100; + public chunkOverlap?: number = 0; + private retriever: any; + + private embeddingModel?: OpenAIEmbeddings = undefined; + private textSplitter?: RecursiveCharacterTextSplitter; + + constructor(chunkSize: number = 4000, chunkOverlap: number = 200) { + this.updateTextSplitter(chunkSize, chunkOverlap); + } + + public updateTextSplitter(chunkSize: number, chunkOverlap: number): void { + this.chunkSize = chunkSize; + this.chunkOverlap = chunkOverlap; + this.textSplitter = new RecursiveCharacterTextSplitter({ + chunkSize: chunkSize, + chunkOverlap: chunkOverlap, + }); + } + + public updateEmbeddingEngine(engine: string): void { + // Engine settings are not compatible with the current embedding model params + // Switch case manually for now + const settings = readEmbeddingEngine(engine); + if (engine === "nitro") { + this.embeddingModel = new OpenAIEmbeddings( + { openAIApiKey: "nitro-embedding" }, + { basePath: "http://127.0.0.1:3928/v1" } + ); + } else { + // Fallback to OpenAI Settings + this.embeddingModel = new OpenAIEmbeddings({ + configuration: { + apiKey: settings.api_key, + }, + }); + } + } + + public ingestAgentKnowledge = async ( + filePath: string, + memoryPath: string + ): Promise => { + const loader = new PDFLoader(filePath, { + splitPages: true, + }); + if (!this.embeddingModel) return Promise.reject(); + const doc = await loader.load(); + const docs = await this.textSplitter!.splitDocuments(doc); + const vectorStore = await HNSWLib.fromDocuments(docs, this.embeddingModel); + return vectorStore.save(memoryPath); + }; + + public loadRetrievalAgent = async (memoryPath: string): Promise => { + if (!this.embeddingModel) return Promise.reject(); + const vectorStore = await HNSWLib.load(memoryPath, this.embeddingModel); + this.retriever = vectorStore.asRetriever(2); + return Promise.resolve(); + }; + + public generateResult = async (query: string): Promise => { + if (!this.retriever) { + return Promise.resolve(" "); + } + const relevantDocs = await this.retriever.getRelevantDocuments(query); + const serializedDoc = formatDocumentsAsString(relevantDocs); + return Promise.resolve(serializedDoc); + }; +} diff --git a/extensions/assistant-extension/tsconfig.json b/extensions/assistant-extension/tsconfig.json index 2477d58ce..d3794cace 100644 --- a/extensions/assistant-extension/tsconfig.json +++ b/extensions/assistant-extension/tsconfig.json @@ -1,14 +1,20 @@ { "compilerOptions": { - "target": "es2016", - "module": "ES6", "moduleResolution": "node", - "outDir": "./dist", - "esModuleInterop": true, - "forceConsistentCasingInFileNames": true, - "strict": false, + "target": "es5", + "module": "ES2020", + "lib": ["es2015", "es2016", "es2017", "dom"], + "strict": true, + "sourceMap": true, + "declaration": true, + "allowSyntheticDefaultImports": true, + "experimentalDecorators": true, + "emitDecoratorMetadata": true, + "declarationDir": "dist/types", + "outDir": "dist", + "importHelpers": true, + "typeRoots": ["node_modules/@types"], "skipLibCheck": true, - "rootDir": "./src" }, - "include": ["./src"] + "include": ["src"], } diff --git a/extensions/assistant-extension/webpack.config.js b/extensions/assistant-extension/webpack.config.js deleted file mode 100644 index 74d16fc8e..000000000 --- a/extensions/assistant-extension/webpack.config.js +++ /dev/null @@ -1,38 +0,0 @@ -const path = require("path"); -const webpack = require("webpack"); -const packageJson = require("./package.json"); - -module.exports = { - experiments: { outputModule: true }, - entry: "./src/index.ts", // Adjust the entry point to match your project's main file - mode: "production", - module: { - rules: [ - { - test: /\.tsx?$/, - use: "ts-loader", - exclude: /node_modules/, - }, - ], - }, - output: { - filename: "index.js", // Adjust the output file name as needed - path: path.resolve(__dirname, "dist"), - library: { type: "module" }, // Specify ESM output format - }, - plugins: [ - new webpack.DefinePlugin({ - MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`), - }), - ], - resolve: { - extensions: [".ts", ".js"], - fallback: { - path: require.resolve("path-browserify"), - }, - }, - optimization: { - minimize: false, - }, - // Add loaders and other configuration as needed for your project -}; diff --git a/extensions/conversational-extension/src/index.ts b/extensions/conversational-extension/src/index.ts index 66becb748..3d28a9c1d 100644 --- a/extensions/conversational-extension/src/index.ts +++ b/extensions/conversational-extension/src/index.ts @@ -4,15 +4,14 @@ import { ConversationalExtension, Thread, ThreadMessage, + events, } from '@janhq/core' /** * JSONConversationalExtension is a ConversationalExtension implementation that provides * functionality for managing threads. */ -export default class JSONConversationalExtension - extends ConversationalExtension -{ +export default class JSONConversationalExtension extends ConversationalExtension { private static readonly _homeDir = 'file://threads' private static readonly _threadInfoFileName = 'thread.json' private static readonly _threadMessagesFileName = 'messages.jsonl' @@ -119,6 +118,33 @@ export default class JSONConversationalExtension ]) if (!(await fs.existsSync(threadDirPath))) await fs.mkdirSync(threadDirPath) + + if (message.content[0]?.type === 'image') { + const filesPath = await joinPath([threadDirPath, 'files']) + if (!(await fs.existsSync(filesPath))) await fs.mkdirSync(filesPath) + + const imagePath = await joinPath([filesPath, `${message.id}.png`]) + const base64 = message.content[0].text.annotations[0] + await this.storeImage(base64, imagePath) + if ((await fs.existsSync(imagePath)) && message.content?.length) { + // Use file path instead of blob + message.content[0].text.annotations[0] = `threads/${message.thread_id}/files/${message.id}.png` + } + } + + if (message.content[0]?.type === 'pdf') { + const filesPath = await joinPath([threadDirPath, 'files']) + if (!(await fs.existsSync(filesPath))) await fs.mkdirSync(filesPath) + + const filePath = await joinPath([filesPath, `${message.id}.pdf`]) + const blob = message.content[0].text.annotations[0] + await this.storeFile(blob, filePath) + + if ((await fs.existsSync(filePath)) && message.content?.length) { + // Use file path instead of blob + message.content[0].text.annotations[0] = `threads/${message.thread_id}/files/${message.id}.pdf` + } + } await fs.appendFileSync(threadMessagePath, JSON.stringify(message) + '\n') Promise.resolve() } catch (err) { @@ -126,6 +152,25 @@ export default class JSONConversationalExtension } } + async storeImage(base64: string, filePath: string): Promise { + const base64Data = base64.replace(/^data:image\/\w+;base64,/, '') + + try { + await fs.writeBlob(filePath, base64Data) + } catch (err) { + console.error(err) + } + } + + async storeFile(base64: string, filePath: string): Promise { + const base64Data = base64.replace(/^data:application\/pdf;base64,/, '') + try { + await fs.writeBlob(filePath, base64Data) + } catch (err) { + console.error(err) + } + } + async writeMessages( threadId: string, messages: ThreadMessage[] @@ -229,7 +274,11 @@ export default class JSONConversationalExtension const messages: ThreadMessage[] = [] result.forEach((line: string) => { - messages.push(JSON.parse(line) as ThreadMessage) + try { + messages.push(JSON.parse(line) as ThreadMessage) + } catch (err) { + console.error(err) + } }) return messages } catch (err) { diff --git a/extensions/inference-nitro-extension/bin/version.txt b/extensions/inference-nitro-extension/bin/version.txt index f2722b133..c2c0004f0 100644 --- a/extensions/inference-nitro-extension/bin/version.txt +++ b/extensions/inference-nitro-extension/bin/version.txt @@ -1 +1 @@ -0.2.12 +0.3.5 diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index 9379e194b..8ad516ad9 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -35,11 +35,12 @@ "rollup-plugin-sourcemaps": "^0.6.3", "rollup-plugin-typescript2": "^0.36.0", "run-script-os": "^1.1.6", - "typescript": "^5.3.3" + "typescript": "^5.2.2" }, "dependencies": { "@janhq/core": "file:../../core", "@rollup/plugin-replace": "^5.0.5", + "@types/os-utils": "^0.0.4", "fetch-retry": "^5.0.6", "path-browserify": "^1.0.1", "rxjs": "^7.8.1", diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-nitro-extension/src/@types/global.d.ts index 5fb41f0f8..bc126337f 100644 --- a/extensions/inference-nitro-extension/src/@types/global.d.ts +++ b/extensions/inference-nitro-extension/src/@types/global.d.ts @@ -2,22 +2,6 @@ declare const NODE: string; declare const INFERENCE_URL: string; declare const TROUBLESHOOTING_URL: string; -/** - * The parameters for the initModel function. - * @property settings - The settings for the machine learning model. - * @property settings.ctx_len - The context length. - * @property settings.ngl - The number of generated tokens. - * @property settings.cont_batching - Whether to use continuous batching. - * @property settings.embedding - Whether to use embedding. - */ -interface EngineSettings { - ctx_len: number; - ngl: number; - cpu_threads: number; - cont_batching: boolean; - embedding: boolean; -} - /** * The response from the initModel function. * @property error - An error message if the model fails to load. @@ -26,8 +10,3 @@ interface ModelOperationResponse { error?: any; modelFile?: string; } - -interface ResourcesInfo { - numCpuPhysicalCore: number; - memAvailable: number; -} \ No newline at end of file diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts index 735383a61..9f1f00263 100644 --- a/extensions/inference-nitro-extension/src/index.ts +++ b/extensions/inference-nitro-extension/src/index.ts @@ -24,6 +24,7 @@ import { MessageEvent, ModelEvent, InferenceEvent, + ModelSettingParams, } from "@janhq/core"; import { requestInference } from "./helpers/sse"; import { ulid } from "ulid"; @@ -45,12 +46,12 @@ export default class JanInferenceNitroExtension extends InferenceExtension { private _currentModel: Model | undefined; - private _engineSettings: EngineSettings = { + private _engineSettings: ModelSettingParams = { ctx_len: 2048, ngl: 100, cpu_threads: 1, cont_batching: false, - embedding: false, + embedding: true, }; controller = new AbortController(); @@ -83,19 +84,19 @@ export default class JanInferenceNitroExtension extends InferenceExtension { // Events subscription events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => - this.onMessageRequest(data) + this.onMessageRequest(data), ); events.on(ModelEvent.OnModelInit, (model: Model) => - this.onModelInit(model) + this.onModelInit(model), ); events.on(ModelEvent.OnModelStop, (model: Model) => - this.onModelStop(model) + this.onModelStop(model), ); events.on(InferenceEvent.OnInferenceStopped, () => - this.onInferenceStopped() + this.onInferenceStopped(), ); // Attempt to fetch nvidia info @@ -120,7 +121,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension { } else { await fs.writeFileSync( engineFile, - JSON.stringify(this._engineSettings, null, 2) + JSON.stringify(this._engineSettings, null, 2), ); } } catch (err) { @@ -133,6 +134,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension { const modelFullPath = await joinPath(["models", model.id]); + this._currentModel = model; const nitroInitResult = await executeOnMain(NODE, "runModel", { modelFullPath, model, @@ -143,12 +145,11 @@ export default class JanInferenceNitroExtension extends InferenceExtension { return; } - this._currentModel = model; events.emit(ModelEvent.OnModelReady, model); this.getNitroProcesHealthIntervalId = setInterval( () => this.periodicallyGetNitroHealth(), - JanInferenceNitroExtension._intervalHealthCheck + JanInferenceNitroExtension._intervalHealthCheck, ); } @@ -225,6 +226,9 @@ export default class JanInferenceNitroExtension extends InferenceExtension { */ private async onMessageRequest(data: MessageRequest) { if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) { + console.log( + `Model is not nitro or no model loaded ${data.model?.engine} ${this._currentModel}` + ); return; } diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts index 0a7a2e33e..7ba90b556 100644 --- a/extensions/inference-nitro-extension/src/node/index.ts +++ b/extensions/inference-nitro-extension/src/node/index.ts @@ -3,11 +3,19 @@ import path from "path"; import { ChildProcessWithoutNullStreams, spawn } from "child_process"; import tcpPortUsed from "tcp-port-used"; import fetchRT from "fetch-retry"; -import { log, getJanDataFolderPath } from "@janhq/core/node"; +import { + log, + getJanDataFolderPath, + getSystemResourceInfo, +} from "@janhq/core/node"; import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia"; -import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core"; +import { + Model, + InferenceEngine, + ModelSettingParams, + PromptTemplate, +} from "@janhq/core"; import { executableNitroFile } from "./execute"; -import { physicalCpuCount } from "./utils"; // Polyfill fetch with retry const fetchRetry = fetchRT(fetch); @@ -19,25 +27,6 @@ interface ModelInitOptions { modelFullPath: string; model: Model; } - -/** - * The response object of Prompt Template parsing. - */ -interface PromptTemplate { - system_prompt?: string; - ai_prompt?: string; - user_prompt?: string; - error?: string; -} - -/** - * Model setting args for Nitro model load. - */ -interface ModelSettingArgs extends ModelSettingParams { - llama_model_path: string; - cpu_threads: number; -} - // The PORT to use for the Nitro subprocess const PORT = 3928; // The HOST address to use for the Nitro subprocess @@ -60,7 +49,7 @@ let subprocess: ChildProcessWithoutNullStreams | undefined = undefined; // The current model file url let currentModelFile: string = ""; // The current model settings -let currentSettings: ModelSettingArgs | undefined = undefined; +let currentSettings: ModelSettingParams | undefined = undefined; /** * Stops a Nitro subprocess. @@ -78,7 +67,7 @@ function stopModel(): Promise { * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package */ async function runModel( - wrapper: ModelInitOptions + wrapper: ModelInitOptions, ): Promise { if (wrapper.model.engine !== InferenceEngine.nitro) { // Not a nitro model @@ -96,7 +85,7 @@ async function runModel( const ggufBinFile = files.find( (file) => file === path.basename(currentModelFile) || - file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT) + file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT), ); if (!ggufBinFile) return Promise.reject("No GGUF model file found"); @@ -106,7 +95,7 @@ async function runModel( if (wrapper.model.engine !== InferenceEngine.nitro) { return Promise.reject("Not a nitro model"); } else { - const nitroResourceProbe = await getResourcesInfo(); + const nitroResourceProbe = await getSystemResourceInfo(); // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt if (wrapper.model.settings.prompt_template) { const promptTemplate = wrapper.model.settings.prompt_template; @@ -119,13 +108,20 @@ async function runModel( wrapper.model.settings.ai_prompt = prompt.ai_prompt; } + const modelFolderPath = path.join(janRoot, "models", wrapper.model.id); + const modelPath = wrapper.model.settings.llama_model_path + ? path.join(modelFolderPath, wrapper.model.settings.llama_model_path) + : currentModelFile; + currentSettings = { - llama_model_path: currentModelFile, ...wrapper.model.settings, + llama_model_path: modelPath, // This is critical and requires real CPU physical core count (or performance core) cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), + ...(wrapper.model.settings.mmproj && { + mmproj: path.join(modelFolderPath, wrapper.model.settings.mmproj), + }), }; - console.log(currentSettings); return runNitroAndLoadModel(); } } @@ -184,10 +180,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate { const system_prompt = promptTemplate.substring(0, systemIndex); const user_prompt = promptTemplate.substring( systemIndex + systemMarker.length, - promptIndex + promptIndex, ); const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length + promptIndex + promptMarker.length, ); // Return the split parts @@ -197,7 +193,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate { const promptIndex = promptTemplate.indexOf(promptMarker); const user_prompt = promptTemplate.substring(0, promptIndex); const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length + promptIndex + promptMarker.length, ); // Return the split parts @@ -213,6 +209,9 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate { * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. */ function loadLLMModel(settings: any): Promise { + if (!settings?.ngl) { + settings.ngl = 100; + } log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`); return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, { method: "POST", @@ -226,14 +225,14 @@ function loadLLMModel(settings: any): Promise { .then((res) => { log( `[NITRO]::Debug: Load model success with response ${JSON.stringify( - res - )}` + res, + )}`, ); return Promise.resolve(res); }) .catch((err) => { log(`[NITRO]::Error: Load model failed with error ${err}`); - return Promise.reject(); + return Promise.reject(err); }); } @@ -255,8 +254,8 @@ async function validateModelStatus(): Promise { retryDelay: 500, }).then(async (res: Response) => { log( - `[NITRO]::Debug: Validate model state success with response ${JSON.stringify( - res + `[NITRO]::Debug: Validate model state with response ${JSON.stringify( + res.status )}` ); // If the response is OK, check model_loaded status. @@ -265,9 +264,19 @@ async function validateModelStatus(): Promise { // If the model is loaded, return an empty object. // Otherwise, return an object with an error message. if (body.model_loaded) { + log( + `[NITRO]::Debug: Validate model state success with response ${JSON.stringify( + body + )}` + ); return Promise.resolve(); } } + log( + `[NITRO]::Debug: Validate model state failed with response ${JSON.stringify( + res.statusText + )}` + ); return Promise.reject("Validate model status failed"); }); } @@ -308,7 +317,7 @@ function spawnNitroProcess(): Promise { const args: string[] = ["1", LOCAL_HOST, PORT.toString()]; // Execute the binary log( - `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}` + `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`, ); subprocess = spawn( executableOptions.executablePath, @@ -319,7 +328,7 @@ function spawnNitroProcess(): Promise { ...process.env, CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, }, - } + }, ); // Handle subprocess output @@ -344,22 +353,6 @@ function spawnNitroProcess(): Promise { }); } -/** - * Get the system resources information - * TODO: Move to Core so that it can be reused - */ -function getResourcesInfo(): Promise { - return new Promise(async (resolve) => { - const cpu = await physicalCpuCount(); - log(`[NITRO]::CPU informations - ${cpu}`); - const response: ResourcesInfo = { - numCpuPhysicalCore: cpu, - memAvailable: 0, - }; - resolve(response); - }); -} - /** * Every module should have a dispose function * This will be called when the extension is unloaded and should clean up any resources diff --git a/extensions/inference-nitro-extension/src/node/utils.ts b/extensions/inference-nitro-extension/src/node/utils.ts deleted file mode 100644 index c7ef2e9a6..000000000 --- a/extensions/inference-nitro-extension/src/node/utils.ts +++ /dev/null @@ -1,56 +0,0 @@ -import os from "os"; -import childProcess from "child_process"; - -function exec(command: string): Promise { - return new Promise((resolve, reject) => { - childProcess.exec(command, { encoding: "utf8" }, (error, stdout) => { - if (error) { - reject(error); - } else { - resolve(stdout); - } - }); - }); -} - -let amount: number; -const platform = os.platform(); - -export async function physicalCpuCount(): Promise { - return new Promise((resolve, reject) => { - if (platform === "linux") { - exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l') - .then((output) => { - amount = parseInt(output.trim(), 10); - resolve(amount); - }) - .catch(reject); - } else if (platform === "darwin") { - exec("sysctl -n hw.physicalcpu_max") - .then((output) => { - amount = parseInt(output.trim(), 10); - resolve(amount); - }) - .catch(reject); - } else if (platform === "win32") { - exec("WMIC CPU Get NumberOfCores") - .then((output) => { - amount = output - .split(os.EOL) - .map((line: string) => parseInt(line)) - .filter((value: number) => !isNaN(value)) - .reduce((sum: number, number: number) => sum + number, 1); - resolve(amount); - }) - .catch(reject); - } else { - const cores = os.cpus().filter((cpu: any, index: number) => { - const hasHyperthreading = cpu.model.includes("Intel"); - const isOdd = index % 2 === 1; - return !hasHyperthreading || isOdd; - }); - amount = cores.length; - resolve(amount); - } - }); -} diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts index 0b53d7c21..fd1230bc7 100644 --- a/extensions/inference-openai-extension/src/index.ts +++ b/extensions/inference-openai-extension/src/index.ts @@ -15,6 +15,7 @@ import { ThreadMessage, events, fs, + InferenceEngine, BaseExtension, MessageEvent, ModelEvent, @@ -114,7 +115,7 @@ export default class JanInferenceOpenAIExtension extends BaseExtension { } } private static async handleModelInit(model: OpenAIModel) { - if (model.engine !== "openai") { + if (model.engine !== InferenceEngine.openai) { return; } else { JanInferenceOpenAIExtension._currentModel = model; diff --git a/extensions/inference-openai-extension/tsconfig.json b/extensions/inference-openai-extension/tsconfig.json index b48175a16..7bfdd9009 100644 --- a/extensions/inference-openai-extension/tsconfig.json +++ b/extensions/inference-openai-extension/tsconfig.json @@ -3,13 +3,12 @@ "target": "es2016", "module": "ES6", "moduleResolution": "node", - "outDir": "./dist", "esModuleInterop": true, "forceConsistentCasingInFileNames": true, "strict": false, "skipLibCheck": true, - "rootDir": "./src" + "rootDir": "./src", }, - "include": ["./src"] + "include": ["./src"], } diff --git a/extensions/inference-triton-trtllm-extension/tsconfig.json b/extensions/inference-triton-trtllm-extension/tsconfig.json index b48175a16..7bfdd9009 100644 --- a/extensions/inference-triton-trtllm-extension/tsconfig.json +++ b/extensions/inference-triton-trtllm-extension/tsconfig.json @@ -3,13 +3,12 @@ "target": "es2016", "module": "ES6", "moduleResolution": "node", - "outDir": "./dist", "esModuleInterop": true, "forceConsistentCasingInFileNames": true, "strict": false, "skipLibCheck": true, - "rootDir": "./src" + "rootDir": "./src", }, - "include": ["./src"] + "include": ["./src"], } diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json index 376e724b4..86f177d14 100644 --- a/extensions/model-extension/package.json +++ b/extensions/model-extension/package.json @@ -1,6 +1,6 @@ { "name": "@janhq/model-extension", - "version": "1.0.22", + "version": "1.0.23", "description": "Model Management Extension provides model exploration and seamless downloads", "main": "dist/index.js", "module": "dist/module.js", diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts index f41999bd8..5640177a0 100644 --- a/extensions/model-extension/src/index.ts +++ b/extensions/model-extension/src/index.ts @@ -80,16 +80,34 @@ export default class JanModelExtension extends ModelExtension { const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id]) if (!(await fs.existsSync(modelDirPath))) await fs.mkdirSync(modelDirPath) - // try to retrieve the download file name from the source url - // if it fails, use the model ID as the file name - const extractedFileName = await model.source_url.split('/').pop() + if (model.sources.length > 1) { + // path to model binaries + for (const source of model.sources) { + let path = this.extractFileName(source.url) + if (source.filename) { + path = await joinPath([modelDirPath, source.filename]) + } + + downloadFile(source.url, path, network) + } + } else { + const fileName = this.extractFileName(model.sources[0]?.url) + const path = await joinPath([modelDirPath, fileName]) + downloadFile(model.sources[0]?.url, path, network) + } + } + + /** + * try to retrieve the download file name from the source url + */ + private extractFileName(url: string): string { + const extractedFileName = url.split('/').pop() const fileName = extractedFileName .toLowerCase() .endsWith(JanModelExtension._supportedModelFormat) ? extractedFileName - : model.id - const path = await joinPath([modelDirPath, fileName]) - downloadFile(model.source_url, path, network) + : extractedFileName + JanModelExtension._supportedModelFormat + return fileName } /** @@ -98,6 +116,7 @@ export default class JanModelExtension extends ModelExtension { * @returns {Promise} A promise that resolves when the download has been cancelled. */ async cancelModelDownload(modelId: string): Promise { + const model = await this.getConfiguredModels() return abortDownload( await joinPath([JanModelExtension._homeDir, modelId, modelId]) ).then(async () => { @@ -163,15 +182,16 @@ export default class JanModelExtension extends ModelExtension { .then((files: string[]) => { // or model binary exists in the directory // model binary name can match model ID or be a .gguf file and not be an incompleted model file + // TODO: Check diff between urls, filenames return ( files.includes(modelDir) || - files.some( + files.filter( (file) => file .toLowerCase() .includes(JanModelExtension._supportedModelFormat) && !file.endsWith(JanModelExtension._incompletedModelFileName) - ) + )?.length >= model.sources.length ) }) } @@ -198,7 +218,6 @@ export default class JanModelExtension extends ModelExtension { const readJsonPromises = allDirectories.map(async (dirName) => { // filter out directories that don't match the selector - // read model.json const jsonPath = await joinPath([ JanModelExtension._homeDir, @@ -226,7 +245,21 @@ export default class JanModelExtension extends ModelExtension { const modelData = results.map((result) => { if (result.status === 'fulfilled') { try { - return result.value as Model + // This to ensure backward compatibility with `model.json` with `source_url` + const tmpModel = + typeof result.value === 'object' + ? result.value + : JSON.parse(result.value) + if (tmpModel['source_url'] != null) { + tmpModel['source'] = [ + { + filename: tmpModel.id, + url: tmpModel['source_url'], + }, + ] + } + + return tmpModel as Model } catch { console.debug(`Unable to parse model metadata: ${result.value}`) return undefined diff --git a/extensions/monitoring-extension/src/index.ts b/extensions/monitoring-extension/src/index.ts index d3f20b437..9297a770f 100644 --- a/extensions/monitoring-extension/src/index.ts +++ b/extensions/monitoring-extension/src/index.ts @@ -1,5 +1,4 @@ -import { MonitoringExtension } from "@janhq/core"; -import { executeOnMain } from "@janhq/core"; +import { MonitoringExtension, executeOnMain } from "@janhq/core"; /** * JanMonitoringExtension is a extension that provides system monitoring functionality. diff --git a/models/bakllava-1/model.json b/models/bakllava-1/model.json new file mode 100644 index 000000000..91f6f4136 --- /dev/null +++ b/models/bakllava-1/model.json @@ -0,0 +1,33 @@ +{ + "sources": [ + { + "filename": "ggml-model-q5_k.gguf", + "url": "https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q5_k.gguf" + }, + { + "filename": "mmproj-model-f16.gguf", + "url": "https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf" + } + ], + "id": "bakllava-1", + "object": "model", + "name": "BakLlava 1", + "version": "1.0", + "description": "BakLlava 1 can bring vision understanding to Jan", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n", + "llama_model_path": "ggml-model-q5_k.gguf", + "mmproj": "mmproj-model-f16.gguf" + }, + "parameters": { + "max_tokens": 4096 + }, + "metadata": { + "author": "Mys", + "tags": ["Vision"], + "size": 5750000000 + }, + "engine": "nitro" +} diff --git a/models/capybara-34b/model.json b/models/capybara-34b/model.json index ffca28c6d..af029bbb0 100644 --- a/models/capybara-34b/model.json +++ b/models/capybara-34b/model.json @@ -1,29 +1,34 @@ { - "source_url": "https://huggingface.co/TheBloke/Nous-Capybara-34B-GGUF/resolve/main/nous-capybara-34b.Q5_K_M.gguf", - "id": "capybara-34b", - "object": "model", - "name": "Capybara 200k 34B Q5", - "version": "1.0", - "description": "Nous Capybara 34B is a long context length model that supports 200K tokens.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "USER:\n{prompt}\nASSISTANT:" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "NousResearch, The Bloke", - "tags": ["34B", "Finetuned"], - "size": 24320000000 - }, - "engine": "nitro" - } - \ No newline at end of file + "sources": [ + { + "filename": "nous-capybara-34b.Q5_K_M.gguf", + "url": "https://huggingface.co/TheBloke/Nous-Capybara-34B-GGUF/resolve/main/nous-capybara-34b.Q5_K_M.gguf" + } + ], + "id": "capybara-34b", + "object": "model", + "name": "Capybara 200k 34B Q5", + "version": "1.0", + "description": "Nous Capybara 34B is a long context length model that supports 200K tokens.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "USER:\n{prompt}\nASSISTANT:", + "llama_model_path": "nous-capybara-34b.Q5_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "NousResearch, The Bloke", + "tags": ["34B", "Finetuned"], + "size": 24320000000 + }, + "engine": "nitro" +} diff --git a/models/codeninja-1.0-7b/model.json b/models/codeninja-1.0-7b/model.json index 98fbac5df..4ffe355d1 100644 --- a/models/codeninja-1.0-7b/model.json +++ b/models/codeninja-1.0-7b/model.json @@ -1,29 +1,33 @@ { - "source_url": "https://huggingface.co/beowolx/CodeNinja-1.0-OpenChat-7B-GGUF/resolve/main/codeninja-1.0-openchat-7b.Q4_K_M.gguf", - "id": "codeninja-1.0-7b", - "object": "model", - "name": "CodeNinja 7B Q4", - "version": "1.0", - "description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": ["<|end_of_turn|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Beowolx", - "tags": ["7B", "Finetuned"], - "size": 4370000000 - }, - "engine": "nitro" - } - \ No newline at end of file + "sources": [ + { + "filename": "codeninja-1.0-openchat-7b.Q4_K_M.gguf", + "url": "https://huggingface.co/beowolx/CodeNinja-1.0-OpenChat-7B-GGUF/resolve/main/codeninja-1.0-openchat-7b.Q4_K_M.gguf" + } + ], + "id": "codeninja-1.0-7b", + "object": "model", + "name": "CodeNinja 7B Q4", + "version": "1.0", + "description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:", + "llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Beowolx", + "tags": ["7B", "Finetuned"], + "size": 4370000000 + }, + "engine": "nitro" +} diff --git a/models/config/default-model.json b/models/config/default-model.json index 2263625f4..2fa2df2ee 100644 --- a/models/config/default-model.json +++ b/models/config/default-model.json @@ -2,7 +2,12 @@ "object": "model", "version": 1, "format": "gguf", - "source_url": "N/A", + "sources": [ + { + "url": "N/A", + "filename": "N/A" + } + ], "id": "N/A", "name": "N/A", "created": 0, @@ -10,7 +15,8 @@ "settings": { "ctx_len": 4096, "embedding": false, - "prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:" + "prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:", + "llama_model_path": "N/A" }, "parameters": { "temperature": 0.7, diff --git a/models/deepseek-coder-1.3b/model.json b/models/deepseek-coder-1.3b/model.json index 8c454802f..365dbfd2f 100644 --- a/models/deepseek-coder-1.3b/model.json +++ b/models/deepseek-coder-1.3b/model.json @@ -1,29 +1,34 @@ - { - "source_url": "https://huggingface.co/TheBloke/deepseek-coder-1.3b-instruct-GGUF/resolve/main/deepseek-coder-1.3b-instruct.Q8_0.gguf", - "id": "deepseek-coder-1.3b", - "object": "model", - "name": "Deepseek Coder 1.3B Q8", - "version": "1.0", - "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "### Instruction:\n{prompt}\n### Response:" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Deepseek, The Bloke", - "tags": ["Tiny", "Foundational Model"], - "size": 1430000000 - }, - "engine": "nitro" - } + "sources": [ + { + "filename": "deepseek-coder-1.3b-instruct.Q8_0.gguf", + "url": "https://huggingface.co/TheBloke/deepseek-coder-1.3b-instruct-GGUF/resolve/main/deepseek-coder-1.3b-instruct.Q8_0.gguf" + } + ], + "id": "deepseek-coder-1.3b", + "object": "model", + "name": "Deepseek Coder 1.3B Q8", + "version": "1.0", + "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "### Instruction:\n{prompt}\n### Response:", + "llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Deepseek, The Bloke", + "tags": ["Tiny", "Foundational Model"], + "size": 1430000000 + }, + "engine": "nitro" +} diff --git a/models/deepseek-coder-34b/model.json b/models/deepseek-coder-34b/model.json index 905a66033..8e17b9563 100644 --- a/models/deepseek-coder-34b/model.json +++ b/models/deepseek-coder-34b/model.json @@ -1,29 +1,34 @@ { - "source_url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q5_K_M.gguf", - "id": "deepseek-coder-34b", - "object": "model", - "name": "Deepseek Coder 33B Q5", - "version": "1.0", - "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "### Instruction:\n{prompt}\n### Response:" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Deepseek, The Bloke", - "tags": ["34B", "Foundational Model"], - "size": 19940000000 - }, - "engine": "nitro" - } - \ No newline at end of file + "sources": [ + { + "filename": "deepseek-coder-33b-instruct.Q5_K_M.gguf", + "url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q5_K_M.gguf" + } + ], + "id": "deepseek-coder-34b", + "object": "model", + "name": "Deepseek Coder 33B Q5", + "version": "1.0", + "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "### Instruction:\n{prompt}\n### Response:", + "llama_model_path": "deepseek-coder-33b-instruct.Q5_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Deepseek, The Bloke", + "tags": ["34B", "Foundational Model"], + "size": 19940000000 + }, + "engine": "nitro" +} diff --git a/models/dolphin-2.7-mixtral-8x7b/model.json b/models/dolphin-2.7-mixtral-8x7b/model.json index 67bc6737c..7df4fbfbd 100644 --- a/models/dolphin-2.7-mixtral-8x7b/model.json +++ b/models/dolphin-2.7-mixtral-8x7b/model.json @@ -1,28 +1,34 @@ { - "source_url": "https://huggingface.co/TheBloke/dolphin-2.7-mixtral-8x7b-GGUF/resolve/main/dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf", - "id": "dolphin-2.7-mixtral-8x7b", - "object": "model", - "name": "Dolphin 8x7B Q4", - "version": "1.0", - "description": "Dolphin is an uncensored model built on Mixtral-8x7b. It is good at programming tasks.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Cognitive Computations, TheBloke", - "tags": ["70B", "Finetuned"], - "size": 26440000000 - }, - "engine": "nitro" - } + "sources": [ + { + "filename": "dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf", + "url": "https://huggingface.co/TheBloke/dolphin-2.7-mixtral-8x7b-GGUF/resolve/main/dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf" + } + ], + "id": "dolphin-2.7-mixtral-8x7b", + "object": "model", + "name": "Dolphin 8x7B Q4", + "version": "1.0", + "description": "Dolphin is an uncensored model built on Mixtral-8x7b. It is good at programming tasks.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Cognitive Computations, TheBloke", + "tags": ["70B", "Finetuned"], + "size": 26440000000 + }, + "engine": "nitro" +} diff --git a/models/gpt-3.5-turbo-16k-0613/model.json b/models/gpt-3.5-turbo-16k-0613/model.json index 0df4119a5..aa57e1154 100644 --- a/models/gpt-3.5-turbo-16k-0613/model.json +++ b/models/gpt-3.5-turbo-16k-0613/model.json @@ -1,18 +1,20 @@ { - "source_url": "https://openai.com", - "id": "gpt-3.5-turbo-16k-0613", - "object": "model", - "name": "OpenAI GPT 3.5 Turbo 16k 0613", - "version": "1.0", - "description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good", - "format": "api", - "settings": {}, - "parameters": {}, - "metadata": { - "author": "OpenAI", - "tags": ["General", "Big Context Length"] - }, - "engine": "openai", - "state": "ready" + "sources": [ + { + "url": "https://openai.com" + } + ], + "id": "gpt-3.5-turbo-16k-0613", + "object": "model", + "name": "OpenAI GPT 3.5 Turbo 16k 0613", + "version": "1.0", + "description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good", + "format": "api", + "settings": {}, + "parameters": {}, + "metadata": { + "author": "OpenAI", + "tags": ["General", "Big Context Length"] + }, + "engine": "openai" } - \ No newline at end of file diff --git a/models/gpt-3.5-turbo/model.json b/models/gpt-3.5-turbo/model.json index a7dbf3d4e..617f0d056 100644 --- a/models/gpt-3.5-turbo/model.json +++ b/models/gpt-3.5-turbo/model.json @@ -1,18 +1,20 @@ { - "source_url": "https://openai.com", - "id": "gpt-3.5-turbo", - "object": "model", - "name": "OpenAI GPT 3.5 Turbo", - "version": "1.0", - "description": "OpenAI GPT 3.5 Turbo model is extremely good", - "format": "api", - "settings": {}, - "parameters": {}, - "metadata": { - "author": "OpenAI", - "tags": ["General", "Big Context Length"] - }, - "engine": "openai", - "state": "ready" + "sources": [ + { + "url": "https://openai.com" + } + ], + "id": "gpt-3.5-turbo", + "object": "model", + "name": "OpenAI GPT 3.5 Turbo", + "version": "1.0", + "description": "OpenAI GPT 3.5 Turbo model is extremely good", + "format": "api", + "settings": {}, + "parameters": {}, + "metadata": { + "author": "OpenAI", + "tags": ["General", "Big Context Length"] + }, + "engine": "openai" } - \ No newline at end of file diff --git a/models/gpt-4/model.json b/models/gpt-4/model.json index 3f17e65cb..7aa2338e3 100644 --- a/models/gpt-4/model.json +++ b/models/gpt-4/model.json @@ -1,18 +1,20 @@ { - "source_url": "https://openai.com", - "id": "gpt-4", - "object": "model", - "name": "OpenAI GPT 4", - "version": "1.0", - "description": "OpenAI GPT 4 model is extremely good", - "format": "api", - "settings": {}, - "parameters": {}, - "metadata": { - "author": "OpenAI", - "tags": ["General", "Big Context Length"] - }, - "engine": "openai", - "state": "ready" + "sources": [ + { + "url": "https://openai.com" + } + ], + "id": "gpt-4", + "object": "model", + "name": "OpenAI GPT 4", + "version": "1.0", + "description": "OpenAI GPT 4 model is extremely good", + "format": "api", + "settings": {}, + "parameters": {}, + "metadata": { + "author": "OpenAI", + "tags": ["General", "Big Context Length"] + }, + "engine": "openai" } - \ No newline at end of file diff --git a/models/llama2-chat-70b-q4/model.json b/models/llama2-chat-70b-q4/model.json index 2595ab677..2459e426f 100644 --- a/models/llama2-chat-70b-q4/model.json +++ b/models/llama2-chat-70b-q4/model.json @@ -1,29 +1,34 @@ { - "source_url": "https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF/resolve/main/llama-2-70b-chat.Q4_K_M.gguf", - "id": "llama2-chat-70b-q4", - "object": "model", - "name": "Llama 2 Chat 70B Q4", - "version": "1.0", - "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "MetaAI, The Bloke", - "tags": ["70B", "Foundational Model"], - "size": 43920000000 - }, - "engine": "nitro" - } - \ No newline at end of file + "sources": [ + { + "filename": "llama-2-70b-chat.Q4_K_M.gguf", + "url": "https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF/resolve/main/llama-2-70b-chat.Q4_K_M.gguf" + } + ], + "id": "llama2-chat-70b-q4", + "object": "model", + "name": "Llama 2 Chat 70B Q4", + "version": "1.0", + "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]", + "llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "MetaAI, The Bloke", + "tags": ["70B", "Foundational Model"], + "size": 43920000000 + }, + "engine": "nitro" +} diff --git a/models/llama2-chat-7b-q4/model.json b/models/llama2-chat-7b-q4/model.json index 68eab3790..bc9847eef 100644 --- a/models/llama2-chat-7b-q4/model.json +++ b/models/llama2-chat-7b-q4/model.json @@ -1,29 +1,34 @@ { - "source_url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf", - "id": "llama2-chat-7b-q4", - "object": "model", - "name": "Llama 2 Chat 7B Q4", - "version": "1.0", - "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "MetaAI, The Bloke", - "tags": ["7B", "Foundational Model"], - "size": 4080000000 - }, - "engine": "nitro" - } - \ No newline at end of file + "sources": [ + { + "filename": "llama-2-7b-chat.Q4_K_M.gguf", + "url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf" + } + ], + "id": "llama2-chat-7b-q4", + "object": "model", + "name": "Llama 2 Chat 7B Q4", + "version": "1.0", + "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]", + "llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "MetaAI, The Bloke", + "tags": ["7B", "Foundational Model"], + "size": 4080000000 + }, + "engine": "nitro" +} diff --git a/models/llava-1.5-13b-q5/model.json b/models/llava-1.5-13b-q5/model.json new file mode 100644 index 000000000..027b8398f --- /dev/null +++ b/models/llava-1.5-13b-q5/model.json @@ -0,0 +1,33 @@ +{ + "sources": [ + { + "filename": "ggml-model-q5_k.gguf", + "url": "https://huggingface.co/mys/ggml_llava-v1.5-13b/resolve/main/ggml-model-q5_k.gguf" + }, + { + "filename": "mmproj-model-f16.gguf", + "url": "https://huggingface.co/mys/ggml_llava-v1.5-13b/resolve/main/mmproj-model-f16.gguf" + } + ], + "id": "llava-1.5-13b-q5", + "object": "model", + "name": "LlaVa 1.5 13B Q5 K", + "version": "1.0", + "description": "LlaVa 1.5 can bring vision understanding to Jan", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n", + "llama_model_path": "ggml-model-q5_k.gguf", + "mmproj": "mmproj-model-f16.gguf" + }, + "parameters": { + "max_tokens": 4096 + }, + "metadata": { + "author": "Mys", + "tags": ["Vision"], + "size": 9850000000 + }, + "engine": "nitro" +} diff --git a/models/llava-1.5-7b-q5/model.json b/models/llava-1.5-7b-q5/model.json new file mode 100644 index 000000000..658b98880 --- /dev/null +++ b/models/llava-1.5-7b-q5/model.json @@ -0,0 +1,33 @@ +{ + "sources": [ + { + "filename": "ggml-model-q5_k.gguf", + "url": "https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/ggml-model-q5_k.gguf" + }, + { + "filename": "mmproj-model-f16.gguf", + "url": "https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/mmproj-model-f16.gguf" + } + ], + "id": "llava-1.5-7b-q5", + "object": "model", + "name": "LlaVa 1.5 7B Q5 K", + "version": "1.0", + "description": "LlaVa 1.5 can bring vision understanding to Jan", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n", + "llama_model_path": "ggml-model-q5_k.gguf", + "mmproj": "mmproj-model-f16.gguf" + }, + "parameters": { + "max_tokens": 4096 + }, + "metadata": { + "author": "Mys", + "tags": ["Vision"], + "size": 5400000000 + }, + "engine": "nitro" +} diff --git a/models/mistral-ins-7b-q4/model.json b/models/mistral-ins-7b-q4/model.json index 6db1aa35b..bfdaffa90 100644 --- a/models/mistral-ins-7b-q4/model.json +++ b/models/mistral-ins-7b-q4/model.json @@ -1,30 +1,35 @@ { - "source_url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf", - "id": "mistral-ins-7b-q4", - "object": "model", - "name": "Mistral Instruct 7B Q4", - "version": "1.0", - "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "[INST] {prompt} [/INST]" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "MistralAI, The Bloke", - "tags": ["Featured", "7B", "Foundational Model"], - "size": 4370000000, - "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png" - }, - "engine": "nitro" - } - \ No newline at end of file + "sources": [ + { + "filename": "mistral-7b-instruct-v0.2.Q4_K_M.gguf", + "url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf" + } + ], + "id": "mistral-ins-7b-q4", + "object": "model", + "name": "Mistral Instruct 7B Q4", + "version": "1.0", + "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "[INST] {prompt} [/INST]", + "llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "MistralAI, The Bloke", + "tags": ["Featured", "7B", "Foundational Model"], + "size": 4370000000, + "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png" + }, + "engine": "nitro" +} diff --git a/models/mixtral-8x7b-instruct/model.json b/models/mixtral-8x7b-instruct/model.json index 31ff2838a..e0a0ee040 100644 --- a/models/mixtral-8x7b-instruct/model.json +++ b/models/mixtral-8x7b-instruct/model.json @@ -1,28 +1,33 @@ { - "source_url": "https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf", - "id": "mixtral-8x7b-instruct", - "object": "model", - "name": "Mixtral 8x7B Instruct Q4", - "version": "1.0", - "description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "[INST] {prompt} [/INST]" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "MistralAI, TheBloke", - "tags": ["70B", "Foundational Model"], - "size": 26440000000 - }, - "engine": "nitro" - } + "sources": [ + { + "filename": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf", + "url": "https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf" + } + ], + "id": "mixtral-8x7b-instruct", + "object": "model", + "name": "Mixtral 8x7B Instruct Q4", + "version": "1.0", + "description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "[INST] {prompt} [/INST]", + "llama_model_path": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "MistralAI, TheBloke", + "tags": ["70B", "Foundational Model"], + "size": 26440000000 + }, + "engine": "nitro" +} diff --git a/models/noromaid-7b/model.json b/models/noromaid-7b/model.json index fbb7858e1..78d579a64 100644 --- a/models/noromaid-7b/model.json +++ b/models/noromaid-7b/model.json @@ -1,29 +1,34 @@ { - "source_url": "https://huggingface.co/NeverSleep/Noromaid-7b-v0.1.1-GGUF/resolve/main/Noromaid-7b-v0.1.1.q5_k_m.gguf", - "id": "noromaid-7b", - "object": "model", - "name": "Noromaid 7B Q5", - "version": "1.0", - "description": "The Noromaid 7b model is designed for role-playing with human-like behavior.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "### Instruction:{prompt}\n### Response:" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "NeverSleep", - "tags": ["7B", "Merged"], - "size": 4370000000 - }, - "engine": "nitro" - } - \ No newline at end of file + "sources": [ + { + "filename": "Noromaid-7b-v0.1.1.q5_k_m.gguf", + "url": "https://huggingface.co/NeverSleep/Noromaid-7b-v0.1.1-GGUF/resolve/main/Noromaid-7b-v0.1.1.q5_k_m.gguf" + } + ], + "id": "noromaid-7b", + "object": "model", + "name": "Noromaid 7B Q5", + "version": "1.0", + "description": "The Noromaid 7b model is designed for role-playing with human-like behavior.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "### Instruction:{prompt}\n### Response:", + "llama_model_path": "Noromaid-7b-v0.1.1.q5_k_m.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "NeverSleep", + "tags": ["7B", "Merged"], + "size": 4370000000 + }, + "engine": "nitro" +} diff --git a/models/openchat-3.5-7b/model.json b/models/openchat-3.5-7b/model.json index e4b72f9c6..294f7d269 100644 --- a/models/openchat-3.5-7b/model.json +++ b/models/openchat-3.5-7b/model.json @@ -1,28 +1,34 @@ { - "source_url": "https://huggingface.co/TheBloke/openchat-3.5-1210-GGUF/resolve/main/openchat-3.5-1210.Q4_K_M.gguf", - "id": "openchat-3.5-7b", - "object": "model", - "name": "Openchat-3.5 7B Q4", - "version": "1.0", - "description": "The performance of this open-source model surpasses that of ChatGPT-3.5 and Grok-1 across various benchmarks.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": ["<|end_of_turn|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Openchat", - "tags": ["Recommended", "7B", "Finetuned"], - "size": 4370000000 - }, - "engine": "nitro" - } + "sources": [ + { + "filename": "openchat-3.5-1210.Q4_K_M.gguf", + "url": "https://huggingface.co/TheBloke/openchat-3.5-1210-GGUF/resolve/main/openchat-3.5-1210.Q4_K_M.gguf" + } + ], + "id": "openchat-3.5-7b", + "object": "model", + "name": "Openchat-3.5 7B Q4", + "version": "1.0", + "description": "The performance of this open-source model surpasses that of ChatGPT-3.5 and Grok-1 across various benchmarks.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:", + "llama_model_path": "openchat-3.5-1210.Q4_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": ["<|end_of_turn|>"], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Openchat", + "tags": ["Recommended", "7B", "Finetuned"], + "size": 4370000000 + }, + "engine": "nitro" +} diff --git a/models/openhermes-neural-7b/model.json b/models/openhermes-neural-7b/model.json index ca3e88502..87e1df143 100644 --- a/models/openhermes-neural-7b/model.json +++ b/models/openhermes-neural-7b/model.json @@ -1,29 +1,34 @@ { - "source_url": "https://huggingface.co/janhq/openhermes-2.5-neural-chat-v3-3-slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf", - "id": "openhermes-neural-7b", - "object": "model", - "name": "OpenHermes Neural 7B Q4", - "version": "1.0", - "description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Intel, Jan", - "tags": ["7B", "Merged", "Featured"], - "size": 4370000000, - "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/openhermes-neural-7b/cover.png" - }, - "engine": "nitro" - } + "sources": [ + { + "filename": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf", + "url": "https://huggingface.co/janhq/openhermes-2.5-neural-chat-v3-3-slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf" + } + ], + "id": "openhermes-neural-7b", + "object": "model", + "name": "OpenHermes Neural 7B Q4", + "version": "1.0", + "description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Intel, Jan", + "tags": ["7B", "Merged", "Featured"], + "size": 4370000000, + "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/openhermes-neural-7b/cover.png" + }, + "engine": "nitro" +} diff --git a/models/phi-2-3b/model.json b/models/phi-2-3b/model.json index 97ff369e7..e452fdb55 100644 --- a/models/phi-2-3b/model.json +++ b/models/phi-2-3b/model.json @@ -1,29 +1,34 @@ { - "source_url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q8_0.gguf", - "id": "phi-2-3b", - "object": "model", - "name": "Phi-2 3B Q8", - "version": "1.0", - "description": "Phi-2 is a 2.7B model, excelling in common sense and logical reasoning benchmarks, trained with synthetic texts and filtered websites.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "Intruct:\n{prompt}\nOutput:" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Microsoft", - "tags": ["3B","Foundational Model"], - "size": 2960000000 - }, - "engine": "nitro" - } - \ No newline at end of file + "sources": [ + { + "filename": "phi-2.Q8_0.gguf", + "url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q8_0.gguf" + } + ], + "id": "phi-2-3b", + "object": "model", + "name": "Phi-2 3B Q8", + "version": "1.0", + "description": "Phi-2 is a 2.7B model, excelling in common sense and logical reasoning benchmarks, trained with synthetic texts and filtered websites.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "Intruct:\n{prompt}\nOutput:", + "llama_model_path": "phi-2.Q8_0.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Microsoft", + "tags": ["3B", "Foundational Model"], + "size": 2960000000 + }, + "engine": "nitro" +} diff --git a/models/phind-34b/model.json b/models/phind-34b/model.json index 7fc77ed71..60309cb9b 100644 --- a/models/phind-34b/model.json +++ b/models/phind-34b/model.json @@ -1,29 +1,34 @@ { - "source_url": "https://huggingface.co/TheBloke/Phind-CodeLlama-34B-v2-GGUF/resolve/main/phind-codellama-34b-v2.Q5_K_M.gguf", - "id": "phind-34b", - "object": "model", - "name": "Phind 34B Q5", - "version": "1.0", - "description": "Phind 34B is fine-tuned on 1.5B tokens of high-quality programming data. This multi-lingual model excels in various programming languages and is designed to be steerable and user-friendly.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Phind, The Bloke", - "tags": ["34B", "Finetuned"], - "size": 20220000000 - }, - "engine": "nitro" - } - \ No newline at end of file + "sources": [ + { + "filename": "phind-codellama-34b-v2.Q5_K_M.gguf", + "url": "https://huggingface.co/TheBloke/Phind-CodeLlama-34B-v2-GGUF/resolve/main/phind-codellama-34b-v2.Q5_K_M.gguf" + } + ], + "id": "phind-34b", + "object": "model", + "name": "Phind 34B Q5", + "version": "1.0", + "description": "Phind 34B is fine-tuned on 1.5B tokens of high-quality programming data. This multi-lingual model excels in various programming languages and is designed to be steerable and user-friendly.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant", + "llama_model_path": "phind-codellama-34b-v2.Q5_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Phind, The Bloke", + "tags": ["34B", "Finetuned"], + "size": 20220000000 + }, + "engine": "nitro" +} diff --git a/models/solar-10.7b-slerp/model.json b/models/solar-10.7b-slerp/model.json index 9177fa013..8e62fa25b 100644 --- a/models/solar-10.7b-slerp/model.json +++ b/models/solar-10.7b-slerp/model.json @@ -1,29 +1,33 @@ { - "source_url": "https://huggingface.co/janhq/Solar-10.7B-SLERP-GGUF/resolve/main/solar-10.7b-slerp.Q4_K_M.gguf", - "id": "solar-10.7b-slerp", - "object": "model", - "name": "Solar Slerp 10.7B Q4", - "version": "1.0", - "description": "This model uses the Slerp merge method from SOLAR Instruct and Pandora-v1", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "### User: {prompt}\n### Assistant:" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Jan", - "tags": ["13B","Finetuned"], - "size": 6360000000 - }, - "engine": "nitro" - } - \ No newline at end of file + "sources": [ + { + "filename": "solar-10.7b-slerp.Q4_K_M.gguf", + "url": "https://huggingface.co/janhq/Solar-10.7B-SLERP-GGUF/resolve/main/solar-10.7b-slerp.Q4_K_M.gguf" + } + ], + "id": "solar-10.7b-slerp", + "object": "model", + "name": "Solar Slerp 10.7B Q4", + "version": "1.0", + "description": "This model uses the Slerp merge method from SOLAR Instruct and Pandora-v1", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "### User: {prompt}\n### Assistant:", + "llama_model_path": "solar-10.7b-slerp.Q4_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Jan", + "tags": ["13B", "Finetuned"], + "size": 6360000000 + }, + "engine": "nitro" +} diff --git a/models/starling-7b/model.json b/models/starling-7b/model.json index 1a6d7e55c..eaa540bd9 100644 --- a/models/starling-7b/model.json +++ b/models/starling-7b/model.json @@ -1,29 +1,34 @@ { - "source_url": "https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/resolve/main/starling-lm-7b-alpha.Q4_K_M.gguf", - "id": "starling-7b", - "object": "model", - "name": "Starling alpha 7B Q4", - "version": "1.0", - "description": "Starling 7B, an upgrade of Openchat 3.5 using RLAIF, is really good at various benchmarks, especially with GPT-4 judging its performance.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant:" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": ["<|end_of_turn|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Berkeley-nest, The Bloke", - "tags": ["7B","Finetuned"], - "size": 4370000000 - }, - "engine": "nitro" - } - \ No newline at end of file + "sources": [ + { + "filename": "starling-lm-7b-alpha.Q4_K_M.gguf", + "url": "https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/resolve/main/starling-lm-7b-alpha.Q4_K_M.gguf" + } + ], + "id": "starling-7b", + "object": "model", + "name": "Starling alpha 7B Q4", + "version": "1.0", + "description": "Starling 7B, an upgrade of Openchat 3.5 using RLAIF, is really good at various benchmarks, especially with GPT-4 judging its performance.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant:", + "llama_model_path": "starling-lm-7b-alpha.Q4_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": ["<|end_of_turn|>"], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Berkeley-nest, The Bloke", + "tags": ["7B", "Finetuned"], + "size": 4370000000 + }, + "engine": "nitro" +} diff --git a/models/stealth-v1.2-7b/model.json b/models/stealth-v1.2-7b/model.json index 92bfe46e1..235cbbb88 100644 --- a/models/stealth-v1.2-7b/model.json +++ b/models/stealth-v1.2-7b/model.json @@ -1,32 +1,33 @@ { - "source_url": "https://huggingface.co/janhq/stealth-v1.3-GGUF/resolve/main/stealth-v1.3.Q4_K_M.gguf", - "id": "stealth-v1.2-7b", - "object": "model", - "name": "Stealth 7B Q4", - "version": "1.0", - "description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Jan", - "tags": [ - "7B", - "Finetuned", - "Featured" - ], - "size": 4370000000 - }, - "engine": "nitro" - } \ No newline at end of file + "sources": [ + { + "filename": "stealth-v1.3.Q4_K_M.gguf", + "url": "https://huggingface.co/janhq/stealth-v1.3-GGUF/resolve/main/stealth-v1.3.Q4_K_M.gguf" + } + ], + "id": "stealth-v1.2-7b", + "object": "model", + "name": "Stealth 7B Q4", + "version": "1.0", + "description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "stealth-v1.3.Q4_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Jan", + "tags": ["7B", "Finetuned", "Featured"], + "size": 4370000000 + }, + "engine": "nitro" +} diff --git a/models/tinyllama-1.1b/model.json b/models/tinyllama-1.1b/model.json index 641511569..6a9187fa5 100644 --- a/models/tinyllama-1.1b/model.json +++ b/models/tinyllama-1.1b/model.json @@ -1,5 +1,10 @@ { - "source_url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", + "sources": [ + { + "filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", + "url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" + } + ], "id": "tinyllama-1.1b", "object": "model", "name": "TinyLlama Chat 1.1B Q4", @@ -7,8 +12,9 @@ "description": "TinyLlama is a tiny model with only 1.1B. It's a good model for less powerful computers.", "format": "gguf", "settings": { - "ctx_len": 2048, - "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>" + "ctx_len": 4096, + "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>", + "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" }, "parameters": { "temperature": 0.7, @@ -20,9 +26,9 @@ "presence_penalty": 0 }, "metadata": { - "author": "TinyLlama", - "tags": ["Tiny", "Foundation Model"], - "size": 669000000 + "author": "TinyLlama", + "tags": ["Tiny", "Foundation Model"], + "size": 669000000 }, "engine": "nitro" -} \ No newline at end of file +} diff --git a/models/trinity-v1.2-7b/model.json b/models/trinity-v1.2-7b/model.json index ce5e7a4cf..2dda120e6 100644 --- a/models/trinity-v1.2-7b/model.json +++ b/models/trinity-v1.2-7b/model.json @@ -1,29 +1,34 @@ { - "source_url": "https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf", - "id": "trinity-v1.2-7b", - "object": "model", - "name": "Trinity-v1.2 7B Q4", - "version": "1.0", - "description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Jan", - "tags": ["7B", "Merged", "Featured"], - "size": 4370000000, - "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png" - }, - "engine": "nitro" - } \ No newline at end of file + "sources": [ + { + "filename": "trinity-v1.2.Q4_K_M.gguf", + "url": "https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf" + } + ], + "id": "trinity-v1.2-7b", + "object": "model", + "name": "Trinity-v1.2 7B Q4", + "version": "1.0", + "description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "trinity-v1.2.Q4_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Jan", + "tags": ["7B", "Merged", "Featured"], + "size": 4370000000, + "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png" + }, + "engine": "nitro" +} diff --git a/models/tulu-2-70b/model.json b/models/tulu-2-70b/model.json index ae95b870d..06b96e50d 100644 --- a/models/tulu-2-70b/model.json +++ b/models/tulu-2-70b/model.json @@ -1,28 +1,33 @@ { - "source_url": "https://huggingface.co/TheBloke/tulu-2-dpo-70B-GGUF/resolve/main/tulu-2-dpo-70b.Q4_K_M.gguf", - "id": "tulu-2-70b", - "object": "model", - "name": "Tulu 2 70B Q4", - "version": "1.0", - "description": "Tulu 70B is a strong alternative to Llama 2 70b Chat to act as helpful assistants.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "<|user|>\n{prompt}\n<|assistant|>" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Lizpreciatior, The Bloke", - "tags": ["70B", "Finetuned"], - "size": 41400000000 - }, - "engine": "nitro" - } \ No newline at end of file + "sources": [ + { + "filename": "tulu-2-dpo-70b.Q4_K_M.gguf", + "url": "https://huggingface.co/TheBloke/tulu-2-dpo-70B-GGUF/resolve/main/tulu-2-dpo-70b.Q4_K_M.gguf" + } + ], + "id": "tulu-2-70b", + "object": "model", + "name": "Tulu 2 70B Q4", + "version": "1.0", + "description": "Tulu 70B is a strong alternative to Llama 2 70b Chat to act as helpful assistants.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "<|user|>\n{prompt}\n<|assistant|>", + "llama_model_path": "tulu-2-dpo-70b.Q4_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Lizpreciatior, The Bloke", + "tags": ["70B", "Finetuned"], + "size": 41400000000 + }, + "engine": "nitro" +} diff --git a/models/wizardcoder-13b/model.json b/models/wizardcoder-13b/model.json index 1605ed362..051c739a0 100644 --- a/models/wizardcoder-13b/model.json +++ b/models/wizardcoder-13b/model.json @@ -1,29 +1,34 @@ { - "source_url": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF/resolve/main/wizardcoder-python-13b-v1.0.Q5_K_M.gguf", - "id": "wizardcoder-13b", - "object": "model", - "name": "Wizard Coder Python 13B Q5", - "version": "1.0", - "description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "### Instruction:\n{prompt}\n### Response:" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "WizardLM, The Bloke", - "tags": ["Recommended", "13B", "Finetuned"], - "size": 7870000000 - }, - "engine": "nitro" - } - \ No newline at end of file + "sources": [ + { + "filename": "wizardcoder-python-13b-v1.0.Q5_K_M.gguf", + "url": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF/resolve/main/wizardcoder-python-13b-v1.0.Q5_K_M.gguf" + } + ], + "id": "wizardcoder-13b", + "object": "model", + "name": "Wizard Coder Python 13B Q5", + "version": "1.0", + "description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "### Instruction:\n{prompt}\n### Response:", + "llama_model_path": "wizardcoder-python-13b-v1.0.Q5_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "WizardLM, The Bloke", + "tags": ["Recommended", "13B", "Finetuned"], + "size": 7870000000 + }, + "engine": "nitro" +} diff --git a/models/yarn-mistral-7b/model.json b/models/yarn-mistral-7b/model.json index 2676fbb58..ee6de1319 100644 --- a/models/yarn-mistral-7b/model.json +++ b/models/yarn-mistral-7b/model.json @@ -1,29 +1,31 @@ { - "source_url": "https://huggingface.co/TheBloke/Yarn-Mistral-7B-128k-GGUF/resolve/main/yarn-mistral-7b-128k.Q4_K_M.gguf", - "id": "yarn-mistral-7b", - "object": "model", - "name": "Yarn Mistral 7B Q4", - "version": "1.0", - "description": "Yarn Mistral 7B is a language model for long context and supports a 128k token context window.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "{prompt}" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "NousResearch, The Bloke", - "tags": ["7B","Finetuned"], - "size": 4370000000 - }, - "engine": "nitro" - } - \ No newline at end of file + "sources": [ + { + "url": "https://huggingface.co/TheBloke/Yarn-Mistral-7B-128k-GGUF/resolve/main/yarn-mistral-7b-128k.Q4_K_M.gguf" + } + ], + "id": "yarn-mistral-7b", + "object": "model", + "name": "Yarn Mistral 7B Q4", + "version": "1.0", + "description": "Yarn Mistral 7B is a language model for long context and supports a 128k token context window.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "{prompt}" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "NousResearch, The Bloke", + "tags": ["7B", "Finetuned"], + "size": 4370000000 + }, + "engine": "nitro" +} diff --git a/models/yi-34b/model.json b/models/yi-34b/model.json index 8ff23aaa0..3697a9e22 100644 --- a/models/yi-34b/model.json +++ b/models/yi-34b/model.json @@ -1,29 +1,34 @@ { - "source_url": "https://huggingface.co/TheBloke/Yi-34B-Chat-GGUF/resolve/main/yi-34b-chat.Q5_K_M.gguf", - "id": "yi-34b", - "object": "model", - "name": "Yi 34B Q5", - "version": "1.0", - "description": "Yi-34B, a specialized chat model, is known for its diverse and creative responses and excels across various NLP tasks and benchmarks.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "01-ai, The Bloke", - "tags": ["34B", "Foundational Model"], - "size": 20660000000 - }, - "engine": "nitro" - } - \ No newline at end of file + "sources": [ + { + "filename": "yi-34b-chat.Q5_K_M.gguf", + "url": "https://huggingface.co/TheBloke/Yi-34B-Chat-GGUF/resolve/main/yi-34b-chat.Q5_K_M.gguf" + } + ], + "id": "yi-34b", + "object": "model", + "name": "Yi 34B Q5", + "version": "1.0", + "description": "Yi-34B, a specialized chat model, is known for its diverse and creative responses and excels across various NLP tasks and benchmarks.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "yi-34b-chat.Q5_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "01-ai, The Bloke", + "tags": ["34B", "Foundational Model"], + "size": 20660000000 + }, + "engine": "nitro" +} diff --git a/server/package.json b/server/package.json index 9495a0d65..f61730da4 100644 --- a/server/package.json +++ b/server/package.json @@ -26,6 +26,8 @@ "dotenv": "^16.3.1", "fastify": "^4.24.3", "request": "^2.88.2", + "fetch-retry": "^5.0.6", + "tcp-port-used": "^1.0.2", "request-progress": "^3.0.0" }, "devDependencies": { @@ -35,6 +37,7 @@ "@typescript-eslint/parser": "^6.7.3", "eslint-plugin-react": "^7.33.2", "run-script-os": "^1.1.6", + "@types/tcp-port-used": "^1.0.4", "typescript": "^5.2.2" } } diff --git a/uikit/package.json b/uikit/package.json index 43e73dcf2..66f05840b 100644 --- a/uikit/package.json +++ b/uikit/package.json @@ -18,6 +18,7 @@ }, "dependencies": { "@radix-ui/react-avatar": "^1.0.4", + "@radix-ui/react-checkbox": "^1.0.4", "@radix-ui/react-context": "^1.0.1", "@radix-ui/react-dialog": "^1.0.5", "@radix-ui/react-icons": "^1.3.0", diff --git a/uikit/src/button/styles.scss b/uikit/src/button/styles.scss index 74585ed1e..003df5b4d 100644 --- a/uikit/src/button/styles.scss +++ b/uikit/src/button/styles.scss @@ -9,7 +9,7 @@ } &-secondary-blue { - @apply bg-blue-200 text-blue-600 hover:bg-blue-500/50; + @apply bg-blue-200 text-blue-600 hover:bg-blue-300/50 dark:hover:bg-blue-200/80; } &-danger { @@ -17,7 +17,7 @@ } &-secondary-danger { - @apply bg-red-200 text-red-600 hover:bg-red-500/50; + @apply bg-red-200 text-red-600 hover:bg-red-300/50 dark:hover:bg-red-200/80; } &-outline { @@ -67,14 +67,18 @@ [type='submit'] { &.btn-primary { @apply bg-primary hover:bg-primary/90; + @apply disabled:pointer-events-none disabled:bg-zinc-100 disabled:text-zinc-400; } &.btn-secondary { @apply bg-secondary hover:bg-secondary/80; + @apply disabled:pointer-events-none disabled:bg-zinc-100 disabled:text-zinc-400; } &.btn-secondary-blue { @apply bg-blue-200 text-blue-900 hover:bg-blue-200/80; + @apply disabled:pointer-events-none disabled:bg-zinc-100 disabled:text-zinc-400; } &.btn-danger { @apply bg-danger hover:bg-danger/90; + @apply disabled:pointer-events-none disabled:bg-zinc-100 disabled:text-zinc-400; } } diff --git a/uikit/src/checkbox/index.tsx b/uikit/src/checkbox/index.tsx new file mode 100644 index 000000000..1e78aeafb --- /dev/null +++ b/uikit/src/checkbox/index.tsx @@ -0,0 +1,29 @@ +'use client' + +import * as React from 'react' +import * as CheckboxPrimitive from '@radix-ui/react-checkbox' +import { CheckIcon } from '@radix-ui/react-icons' + +import { twMerge } from 'tailwind-merge' + +const Checkbox = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, ...props }, ref) => ( + + + + + +)) +Checkbox.displayName = CheckboxPrimitive.Root.displayName + +export { Checkbox } diff --git a/uikit/src/checkbox/styles.scss b/uikit/src/checkbox/styles.scss new file mode 100644 index 000000000..33610f837 --- /dev/null +++ b/uikit/src/checkbox/styles.scss @@ -0,0 +1,7 @@ +.checkbox { + @apply border-border data-[state=checked]:bg-primary h-5 w-5 flex-shrink-0 rounded-md border data-[state=checked]:text-white; + + &--icon { + @apply h-4 w-4; + } +} diff --git a/uikit/src/index.ts b/uikit/src/index.ts index 3d5eaa82a..1b0a26bd1 100644 --- a/uikit/src/index.ts +++ b/uikit/src/index.ts @@ -12,3 +12,4 @@ export * from './command' export * from './textarea' export * from './select' export * from './slider' +export * from './checkbox' diff --git a/uikit/src/main.scss b/uikit/src/main.scss index 546f22811..c1326ba19 100644 --- a/uikit/src/main.scss +++ b/uikit/src/main.scss @@ -16,6 +16,7 @@ @import './textarea/styles.scss'; @import './select/styles.scss'; @import './slider/styles.scss'; +@import './checkbox/styles.scss'; .animate-spin { animation: spin 1s linear infinite; diff --git a/web/containers/CardSidebar/index.tsx b/web/containers/CardSidebar/index.tsx index 552856921..38a8678d9 100644 --- a/web/containers/CardSidebar/index.tsx +++ b/web/containers/CardSidebar/index.tsx @@ -19,6 +19,7 @@ import { activeThreadAtom } from '@/helpers/atoms/Thread.atom' interface Props { children: ReactNode + rightAction?: ReactNode title: string asChild?: boolean hideMoreVerticalAction?: boolean @@ -27,6 +28,7 @@ export default function CardSidebar({ children, title, asChild, + rightAction, hideMoreVerticalAction, }: Props) { const [show, setShow] = useState(true) @@ -48,27 +50,16 @@ export default function CardSidebar({
- {title} -
- {!asChild && ( - <> - {!hideMoreVerticalAction && ( -
setMore(!more)} - > - -
- )} - - )} +
+ {title} +
+
+ {rightAction && rightAction} + {!asChild && ( + <> + {!hideMoreVerticalAction && ( +
setMore(!more)} + > + +
+ )} + + )}
{more && ( diff --git a/web/containers/Checkbox/index.tsx b/web/containers/Checkbox/index.tsx index e8f916d98..a545771b6 100644 --- a/web/containers/Checkbox/index.tsx +++ b/web/containers/Checkbox/index.tsx @@ -9,54 +9,26 @@ import { TooltipTrigger, } from '@janhq/uikit' -import { useAtomValue, useSetAtom } from 'jotai' import { InfoIcon } from 'lucide-react' -import { useActiveModel } from '@/hooks/useActiveModel' -import useUpdateModelParameters from '@/hooks/useUpdateModelParameters' - -import { getConfigurationsData } from '@/utils/componentSettings' -import { toSettingParams } from '@/utils/modelParam' - -import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom' -import { - engineParamsUpdateAtom, - getActiveThreadIdAtom, - getActiveThreadModelParamsAtom, -} from '@/helpers/atoms/Thread.atom' - type Props = { name: string title: string + enabled?: boolean description: string checked: boolean + onValueChanged?: (e: string | number | boolean) => void } -const Checkbox: React.FC = ({ name, title, checked, description }) => { - const { updateModelParameter } = useUpdateModelParameters() - const threadId = useAtomValue(getActiveThreadIdAtom) - - const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom) - - const modelSettingParams = toSettingParams(activeModelParams) - - const engineParams = getConfigurationsData(modelSettingParams) - - const setEngineParamsUpdate = useSetAtom(engineParamsUpdateAtom) - - const serverEnabled = useAtomValue(serverEnabledAtom) - - const { stopModel } = useActiveModel() - +const Checkbox: React.FC = ({ + title, + checked, + enabled = true, + description, + onValueChanged, +}) => { const onCheckedChange = (checked: boolean) => { - if (!threadId) return - if (engineParams.some((x) => x.name.includes(name))) { - setEngineParamsUpdate(true) - stopModel() - } else { - setEngineParamsUpdate(false) - } - updateModelParameter(threadId, name, checked) + onValueChanged?.(checked) } return ( @@ -80,7 +52,7 @@ const Checkbox: React.FC = ({ name, title, checked, description }) => {
) diff --git a/web/containers/DropdownListSidebar/index.tsx b/web/containers/DropdownListSidebar/index.tsx index eb867f54e..140a1aba1 100644 --- a/web/containers/DropdownListSidebar/index.tsx +++ b/web/containers/DropdownListSidebar/index.tsx @@ -26,6 +26,8 @@ import { useMainViewState } from '@/hooks/useMainViewState' import useRecommendedModel from '@/hooks/useRecommendedModel' +import useUpdateModelParameters from '@/hooks/useUpdateModelParameters' + import { toGibibytes } from '@/utils/converter' import ModelLabel from '../ModelLabel' @@ -34,10 +36,8 @@ import OpenAiKeyInput from '../OpenAiKeyInput' import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom' import { - ModelParams, activeThreadAtom, setThreadModelParamsAtom, - threadStatesAtom, } from '@/helpers/atoms/Thread.atom' export const selectedModelAtom = atom(undefined) @@ -49,7 +49,6 @@ const DropdownListSidebar = ({ strictedThread?: boolean }) => { const activeThread = useAtomValue(activeThreadAtom) - const threadStates = useAtomValue(threadStatesAtom) const [selectedModel, setSelectedModel] = useAtom(selectedModelAtom) const setThreadModelParams = useSetAtom(setThreadModelParamsAtom) @@ -58,15 +57,7 @@ const DropdownListSidebar = ({ const { setMainViewState } = useMainViewState() const [loader, setLoader] = useState(0) const { recommendedModel, downloadedModels } = useRecommendedModel() - - /** - * Default value for max_tokens and ctx_len - * Its to avoid OOM issue since a model can set a big number for these settings - */ - const defaultValue = (value?: number) => { - if (value && value < 4096) return value - return 4096 - } + const { updateModelParameter } = useUpdateModelParameters() useEffect(() => { if (!activeThread) return @@ -78,31 +69,7 @@ const DropdownListSidebar = ({ model = recommendedModel } setSelectedModel(model) - const finishInit = threadStates[activeThread.id].isFinishInit ?? true - if (finishInit) return - const modelParams: ModelParams = { - ...model?.parameters, - ...model?.settings, - /** - * This is to set default value for these settings instead of maximum value - * Should only apply when model.json has these settings - */ - ...(model?.parameters.max_tokens && { - max_tokens: defaultValue(model?.parameters.max_tokens), - }), - ...(model?.settings.ctx_len && { - ctx_len: defaultValue(model?.settings.ctx_len), - }), - } - setThreadModelParams(activeThread.id, modelParams) - }, [ - recommendedModel, - activeThread, - threadStates, - downloadedModels, - setThreadModelParams, - setSelectedModel, - ]) + }, [recommendedModel, activeThread, downloadedModels, setSelectedModel]) // This is fake loader please fix this when we have realtime percentage when load model useEffect(() => { @@ -144,7 +111,16 @@ const DropdownListSidebar = ({ ...model?.parameters, ...model?.settings, } + // Update model paramter to the thread state setThreadModelParams(activeThread.id, modelParams) + + // Update model parameter to the thread file + if (model) + updateModelParameter(activeThread.id, { + params: modelParams, + modelId: model.id, + engine: model.engine, + }) } }, [ @@ -154,6 +130,7 @@ const DropdownListSidebar = ({ setSelectedModel, setServerEnabled, setThreadModelParams, + updateModelParameter, ] ) diff --git a/web/containers/Layout/index.tsx b/web/containers/Layout/index.tsx index 033038bad..77a1fe971 100644 --- a/web/containers/Layout/index.tsx +++ b/web/containers/Layout/index.tsx @@ -12,7 +12,8 @@ import TopBar from '@/containers/Layout/TopBar' import { MainViewState } from '@/constants/screens' import { useMainViewState } from '@/hooks/useMainViewState' -import { SUCCESS_SET_NEW_DESTINATION } from '@/hooks/useVaultDirectory' + +import { SUCCESS_SET_NEW_DESTINATION } from '@/screens/Settings/Advanced/DataFolder' const BaseLayout = (props: PropsWithChildren) => { const { children } = props @@ -27,7 +28,6 @@ const BaseLayout = (props: PropsWithChildren) => { useEffect(() => { if (localStorage.getItem(SUCCESS_SET_NEW_DESTINATION) === 'true') { setMainViewState(MainViewState.Settings) - localStorage.removeItem(SUCCESS_SET_NEW_DESTINATION) } }, [setMainViewState]) diff --git a/web/containers/Loader/GenerateResponse.tsx b/web/containers/Loader/GenerateResponse.tsx new file mode 100644 index 000000000..457c44987 --- /dev/null +++ b/web/containers/Loader/GenerateResponse.tsx @@ -0,0 +1,39 @@ +import React, { useEffect, useState } from 'react' + +export default function GenerateResponse() { + const [loader, setLoader] = useState(0) + + // This is fake loader please fix this when we have realtime percentage when load model + useEffect(() => { + if (loader === 24) { + setTimeout(() => { + setLoader(loader + 1) + }, 250) + } else if (loader === 50) { + setTimeout(() => { + setLoader(loader + 1) + }, 250) + } else if (loader === 78) { + setTimeout(() => { + setLoader(loader + 1) + }, 250) + } else if (loader === 85) { + setLoader(85) + } else { + setLoader(loader + 1) + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [loader]) + + return ( +
+
+
+ Generating response... +
+
+ ) +} diff --git a/web/containers/ModelConfigInput/index.tsx b/web/containers/ModelConfigInput/index.tsx index e409fd424..d573a0bf9 100644 --- a/web/containers/ModelConfigInput/index.tsx +++ b/web/containers/ModelConfigInput/index.tsx @@ -7,65 +7,26 @@ import { TooltipTrigger, } from '@janhq/uikit' -import { useAtomValue, useSetAtom } from 'jotai' - import { InfoIcon } from 'lucide-react' -import { useActiveModel } from '@/hooks/useActiveModel' -import useUpdateModelParameters from '@/hooks/useUpdateModelParameters' - -import { getConfigurationsData } from '@/utils/componentSettings' - -import { toSettingParams } from '@/utils/modelParam' - -import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom' -import { - engineParamsUpdateAtom, - getActiveThreadIdAtom, - getActiveThreadModelParamsAtom, -} from '@/helpers/atoms/Thread.atom' - type Props = { title: string + enabled?: boolean name: string description: string placeholder: string value: string + onValueChanged?: (e: string | number | boolean) => void } const ModelConfigInput: React.FC = ({ title, - name, + enabled = true, value, description, placeholder, + onValueChanged, }) => { - const { updateModelParameter } = useUpdateModelParameters() - const threadId = useAtomValue(getActiveThreadIdAtom) - - const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom) - - const modelSettingParams = toSettingParams(activeModelParams) - - const engineParams = getConfigurationsData(modelSettingParams) - - const setEngineParamsUpdate = useSetAtom(engineParamsUpdateAtom) - - const { stopModel } = useActiveModel() - - const serverEnabled = useAtomValue(serverEnabledAtom) - - const onValueChanged = (e: React.ChangeEvent) => { - if (!threadId) return - if (engineParams.some((x) => x.name.includes(name))) { - setEngineParamsUpdate(true) - stopModel() - } else { - setEngineParamsUpdate(false) - } - updateModelParameter(threadId, name, e.target.value) - } - return (
@@ -86,9 +47,9 @@ const ModelConfigInput: React.FC = ({