diff --git a/.github/workflows/template-noti-discord-and-update-url-readme.yml b/.github/workflows/template-noti-discord-and-update-url-readme.yml index a53c20be5..d799dacfa 100644 --- a/.github/workflows/template-noti-discord-and-update-url-readme.yml +++ b/.github/workflows/template-noti-discord-and-update-url-readme.yml @@ -47,11 +47,11 @@ jobs: with: args: | Jan App ${{ inputs.build_reason }} build artifact version {{ VERSION }}: - - Windows: https://delta.jan.ai/nightly/jan-win-x64-{{ VERSION }}.exe - - macOS Intel: https://delta.jan.ai/nightly/jan-mac-x64-{{ VERSION }}.dmg - - macOS Apple Silicon: https://delta.jan.ai/nightly/jan-mac-arm64-{{ VERSION }}.dmg - - Linux Deb: https://delta.jan.ai/nightly/jan-linux-amd64-{{ VERSION }}.deb - - Linux AppImage: https://delta.jan.ai/nightly/jan-linux-x86_64-{{ VERSION }}.AppImage + - Windows: https://delta.jan.ai/nightly/jan-nightly-win-x64-{{ VERSION }}.exe + - macOS Intel: https://delta.jan.ai/nightly/jan-nightly-mac-x64-{{ VERSION }}.dmg + - macOS Apple Silicon: https://delta.jan.ai/nightly/jan-nightly-mac-arm64-{{ VERSION }}.dmg + - Linux Deb: https://delta.jan.ai/nightly/jan-nightly-linux-amd64-{{ VERSION }}.deb + - Linux AppImage: https://delta.jan.ai/nightly/jan-nightly-linux-x86_64-{{ VERSION }}.AppImage - Github action run: https://github.com/janhq/jan/actions/runs/{{ GITHUB_RUN_ID }} env: DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index f28d152d9..ab815678a 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,4 @@ coverage .yarnrc test_results.html *.tsbuildinfo +electron/shared/** diff --git a/core/src/browser/core.test.ts b/core/src/browser/core.test.ts index f38cc0b40..720ea9dcf 100644 --- a/core/src/browser/core.test.ts +++ b/core/src/browser/core.test.ts @@ -3,7 +3,6 @@ import { joinPath } from './core' import { openFileExplorer } from './core' import { getJanDataFolderPath } from './core' import { abortDownload } from './core' -import { getFileSize } from './core' import { executeOnMain } from './core' describe('test core apis', () => { @@ -66,18 +65,6 @@ describe('test core apis', () => { expect(result).toBe('aborted') }) - it('should get file size', async () => { - const url = 'http://example.com/file' - globalThis.core = { - api: { - getFileSize: jest.fn().mockResolvedValue(1024), - }, - } - const result = await getFileSize(url) - expect(globalThis.core.api.getFileSize).toHaveBeenCalledWith(url) - expect(result).toBe(1024) - }) - it('should execute function on main process', async () => { const extension = 'testExtension' const method = 'testMethod' diff --git a/core/src/browser/core.ts b/core/src/browser/core.ts index b19e0b339..7058fc172 100644 --- a/core/src/browser/core.ts +++ b/core/src/browser/core.ts @@ -28,15 +28,6 @@ const downloadFile: (downloadRequest: DownloadRequest, network?: NetworkConfig) network ) => globalThis.core?.api?.downloadFile(downloadRequest, network) -/** - * Get unit in bytes for a remote file. - * - * @param url - The url of the file. - * @returns {Promise} - A promise that resolves with the file size. - */ -const getFileSize: (url: string) => Promise = (url: string) => - globalThis.core.api?.getFileSize(url) - /** * Aborts the download of a specific file. * @param {string} fileName - The name of the file whose download is to be aborted. @@ -167,7 +158,6 @@ export { getUserHomePath, systemInformation, showToast, - getFileSize, dirName, FileStat, } diff --git a/core/src/browser/extension.ts b/core/src/browser/extension.ts index d934e1c06..b7a9fca4e 100644 --- a/core/src/browser/extension.ts +++ b/core/src/browser/extension.ts @@ -113,7 +113,6 @@ export abstract class BaseExtension implements ExtensionType { for (const model of models) { ModelManager.instance().register(model) } - events.emit(ModelEvent.OnModelsUpdate, {}) } /** diff --git a/core/src/browser/extensions/engines/helpers/sse.ts b/core/src/browser/extensions/engines/helpers/sse.ts index 9713256b3..084267582 100644 --- a/core/src/browser/extensions/engines/helpers/sse.ts +++ b/core/src/browser/extensions/engines/helpers/sse.ts @@ -38,14 +38,16 @@ export function requestInference( errorCode = ErrorCode.InvalidApiKey } const error = { - message: data.error?.message ?? 'Error occurred.', + message: data.error?.message ?? data.message ?? 'Error occurred.', code: errorCode, } subscriber.error(error) subscriber.complete() return } - if (model.parameters?.stream === false) { + // There could be overriden stream parameter in the model + // that is set in request body (transformed payload) + if (requestBody?.stream === false || model.parameters?.stream === false) { const data = await response.json() if (transformResponse) { subscriber.next(transformResponse(data)) diff --git a/core/src/browser/extensions/model.ts b/core/src/browser/extensions/model.ts index e224ec5cc..9a3428988 100644 --- a/core/src/browser/extensions/model.ts +++ b/core/src/browser/extensions/model.ts @@ -12,6 +12,7 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter return ExtensionTypeEnum.Model } + abstract configurePullOptions(configs: { [key: string]: any }): Promise abstract getModels(): Promise abstract pullModel(model: string, id?: string, name?: string): Promise abstract cancelModelPull(modelId: string): Promise diff --git a/core/src/node/api/processors/download.test.ts b/core/src/node/api/processors/download.test.ts index 21d94165d..c4b171a7d 100644 --- a/core/src/node/api/processors/download.test.ts +++ b/core/src/node/api/processors/download.test.ts @@ -23,6 +23,11 @@ jest.mock('fs', () => ({ createWriteStream: jest.fn(), })) +const requestMock = jest.fn((options, callback) => { + callback(new Error('Test error'), null) +}) +jest.mock('request', () => requestMock) + jest.mock('request-progress', () => { return jest.fn().mockImplementation(() => { return { @@ -54,18 +59,6 @@ describe('Downloader', () => { beforeEach(() => { jest.resetAllMocks() }) - it('should handle getFileSize errors correctly', async () => { - const observer = jest.fn() - const url = 'http://example.com/file' - - const downloader = new Downloader(observer) - const requestMock = jest.fn((options, callback) => { - callback(new Error('Test error'), null) - }) - jest.mock('request', () => requestMock) - - await expect(downloader.getFileSize(observer, url)).rejects.toThrow('Test error') - }) it('should pause download correctly', () => { const observer = jest.fn() diff --git a/core/src/node/api/processors/download.ts b/core/src/node/api/processors/download.ts index ebeb7c299..709ad9687 100644 --- a/core/src/node/api/processors/download.ts +++ b/core/src/node/api/processors/download.ts @@ -135,25 +135,4 @@ export class Downloader implements Processor { pauseDownload(_observer: any, fileName: any) { DownloadManager.instance.networkRequests[fileName]?.pause() } - - async getFileSize(_observer: any, url: string): Promise { - return new Promise((resolve, reject) => { - const request = require('request') - request( - { - url, - method: 'HEAD', - }, - function (err: any, response: any) { - if (err) { - console.error('Getting file size failed:', err) - reject(err) - } else { - const size: number = response.headers['content-length'] ?? -1 - resolve(size) - } - } - ) - }) - } } diff --git a/core/src/node/api/restful/common.ts b/core/src/node/api/restful/common.ts index 39f7b8d8b..989104e03 100644 --- a/core/src/node/api/restful/common.ts +++ b/core/src/node/api/restful/common.ts @@ -1,7 +1,6 @@ import { HttpServer } from '../HttpServer' import { chatCompletions, - deleteBuilder, downloadModel, getBuilder, retrieveBuilder, @@ -14,8 +13,6 @@ import { } from './helper/builder' import { JanApiRouteConfiguration } from './helper/configuration' -import { startModel, stopModel } from './helper/startStopModel' -import { ModelSettingParams } from '../../../types' export const commonRouter = async (app: HttpServer) => { const normalizeData = (data: any) => { @@ -28,19 +25,25 @@ export const commonRouter = async (app: HttpServer) => { // Read & Delete :: Threads | Models | Assistants Object.keys(JanApiRouteConfiguration).forEach((key) => { app.get(`/${key}`, async (_req, _res) => { - if (key === 'models') { + if (key.includes('models')) { return models(_req, _res) } return getBuilder(JanApiRouteConfiguration[key]).then(normalizeData) }) - app.get(`/${key}/:id`, async (request: any) => - retrieveBuilder(JanApiRouteConfiguration[key], request.params.id) - ) + app.get(`/${key}/:id`, async (_req: any, _res: any) => { + if (key.includes('models')) { + return models(_req, _res) + } + return retrieveBuilder(JanApiRouteConfiguration[key], _req.params.id) + }) - app.delete(`/${key}/:id`, async (request: any) => - deleteBuilder(JanApiRouteConfiguration[key], request.params.id) - ) + app.delete(`/${key}/:id`, async (_req: any, _res: any) => { + if (key.includes('models')) { + return models(_req, _res) + } + return retrieveBuilder(JanApiRouteConfiguration[key], _req.params.id) + }) }) // Threads @@ -70,16 +73,9 @@ export const commonRouter = async (app: HttpServer) => { }) ) - app.put(`/models/:modelId/start`, async (request: any) => { - let settingParams: ModelSettingParams | undefined = undefined - if (Object.keys(request.body).length !== 0) { - settingParams = JSON.parse(request.body) as ModelSettingParams - } + app.post(`/models/start`, async (request: any, reply: any) => models(request, reply)) - return startModel(request.params.modelId, settingParams) - }) - - app.put(`/models/:modelId/stop`, async (request: any) => stopModel(request.params.modelId)) + app.post(`/models/stop`, async (request: any, reply: any) => models(request, reply)) // Chat Completion app.post(`/chat/completions`, async (request: any, reply: any) => chatCompletions(request, reply)) diff --git a/core/src/node/api/restful/helper/builder.test.ts b/core/src/node/api/restful/helper/builder.test.ts index f21257098..cfaee6007 100644 --- a/core/src/node/api/restful/helper/builder.test.ts +++ b/core/src/node/api/restful/helper/builder.test.ts @@ -1,17 +1,7 @@ -import { - existsSync, - readdirSync, - readFileSync, - writeFileSync, - mkdirSync, - appendFileSync, - rmdirSync, -} from 'fs' -import { join } from 'path' +import { existsSync, readdirSync, readFileSync, writeFileSync, mkdirSync, appendFileSync } from 'fs' import { getBuilder, retrieveBuilder, - deleteBuilder, getMessages, retrieveMessage, createThread, @@ -82,34 +72,6 @@ describe('builder helper functions', () => { }) }) - describe('deleteBuilder', () => { - it('should return a message if trying to delete Jan assistant', async () => { - const result = await deleteBuilder({ ...mockConfiguration, dirName: 'assistants' }, 'jan') - expect(result).toEqual({ message: 'Cannot delete Jan assistant' }) - }) - - it('should return a message if data is not found', async () => { - ;(existsSync as jest.Mock).mockReturnValue(true) - ;(readdirSync as jest.Mock).mockReturnValue(['file1']) - ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' })) - - const result = await deleteBuilder(mockConfiguration, 'nonexistentId') - expect(result).toEqual({ message: 'Not found' }) - }) - - it('should delete the directory and return success message', async () => { - ;(existsSync as jest.Mock).mockReturnValue(true) - ;(readdirSync as jest.Mock).mockReturnValue(['file1']) - ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' })) - - const result = await deleteBuilder(mockConfiguration, 'model1') - expect(rmdirSync).toHaveBeenCalledWith(join('/mock/path', 'mockDir', 'model1'), { - recursive: true, - }) - expect(result).toEqual({ id: 'model1', object: 'mockObject', deleted: true }) - }) - }) - describe('getMessages', () => { it('should return an empty array if message file does not exist', async () => { ;(existsSync as jest.Mock).mockReturnValue(false) diff --git a/core/src/node/api/restful/helper/builder.ts b/core/src/node/api/restful/helper/builder.ts index c3493a8be..e081708cf 100644 --- a/core/src/node/api/restful/helper/builder.ts +++ b/core/src/node/api/restful/helper/builder.ts @@ -73,34 +73,6 @@ export const retrieveBuilder = async (configuration: RouteConfiguration, id: str return filteredData } -export const deleteBuilder = async (configuration: RouteConfiguration, id: string) => { - if (configuration.dirName === 'assistants' && id === 'jan') { - return { - message: 'Cannot delete Jan assistant', - } - } - - const directoryPath = join(getJanDataFolderPath(), configuration.dirName) - try { - const data = await retrieveBuilder(configuration, id) - if (!data) { - return { - message: 'Not found', - } - } - - const objectPath = join(directoryPath, id) - rmdirSync(objectPath, { recursive: true }) - return { - id: id, - object: configuration.delete.object, - deleted: true, - } - } catch (ex) { - console.error(ex) - } -} - export const getMessages = async (threadId: string): Promise => { const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId) const messageFile = 'messages.jsonl' @@ -308,7 +280,7 @@ export const models = async (request: any, reply: any) => { 'Content-Type': 'application/json', } - const response = await fetch(`${CORTEX_API_URL}/models`, { + const response = await fetch(`${CORTEX_API_URL}/models${request.url.split('/models')[1] ?? ""}`, { method: request.method, headers: headers, body: JSON.stringify(request.body), diff --git a/core/src/node/api/restful/helper/startStopModel.test.ts b/core/src/node/api/restful/helper/startStopModel.test.ts deleted file mode 100644 index 7c1a56cf1..000000000 --- a/core/src/node/api/restful/helper/startStopModel.test.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { startModel } from './startStopModel' - -describe('startModel', () => { - it('test_startModel_error', async () => { - const modelId = 'testModelId' - const settingParams = undefined - - expect(startModel(modelId, settingParams)).resolves.toThrow() - }) -}) diff --git a/core/src/node/api/restful/helper/startStopModel.ts b/core/src/node/api/restful/helper/startStopModel.ts deleted file mode 100644 index 2e9db6d15..000000000 --- a/core/src/node/api/restful/helper/startStopModel.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { ModelSettingParams } from '../../../../types' -import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts' - -/** - * Start a model - * @param modelId - * @param settingParams - * @returns - */ -export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => { - return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, { - method: 'POST', - body: JSON.stringify({ model: modelId, ...settingParams }), - }) -} - -/* - * Stop model. - */ -export const stopModel = async (modelId: string) => { - return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, { - method: 'POST', - body: JSON.stringify({ model: modelId }), - }) -} diff --git a/core/src/types/api/index.ts b/core/src/types/api/index.ts index 093314a15..dc9afcb00 100644 --- a/core/src/types/api/index.ts +++ b/core/src/types/api/index.ts @@ -27,6 +27,7 @@ export enum NativeRoute { quickAskSizeUpdated = 'quickAskSizeUpdated', ackDeepLink = 'ackDeepLink', + factoryReset = 'factoryReset' } /** @@ -65,7 +66,6 @@ export enum DownloadRoute { pauseDownload = 'pauseDownload', resumeDownload = 'resumeDownload', getDownloadProgress = 'getDownloadProgress', - getFileSize = 'getFileSize', } export enum DownloadEvent { diff --git a/core/src/types/setting/settingComponent.ts b/core/src/types/setting/settingComponent.ts index 2eae4e16f..2474f6bd4 100644 --- a/core/src/types/setting/settingComponent.ts +++ b/core/src/types/setting/settingComponent.ts @@ -12,7 +12,7 @@ export type SettingComponentProps = { export type ConfigType = 'runtime' | 'setting' -export type ControllerType = 'slider' | 'checkbox' | 'input' +export type ControllerType = 'slider' | 'checkbox' | 'input' | 'tag' export type InputType = 'password' | 'text' | 'email' | 'number' | 'tel' | 'url' @@ -22,7 +22,7 @@ export type InputAction = InputActionsTuple[number] export type InputComponentProps = { placeholder: string - value: string + value: string | string[] type?: InputType textAlign?: 'left' | 'right' inputActions?: InputAction[] diff --git a/docs/src/pages/docs/_meta.json b/docs/src/pages/docs/_meta.json index 231f6a763..8ed88963c 100644 --- a/docs/src/pages/docs/_meta.json +++ b/docs/src/pages/docs/_meta.json @@ -13,6 +13,7 @@ }, "desktop": "Desktop", "data-folder": "Jan Data Folder", + "privacy": "Privacy", "user-guides": { "title": "BASIC USAGE", "type": "separator" diff --git a/docs/src/pages/docs/privacy.mdx b/docs/src/pages/docs/privacy.mdx new file mode 100644 index 000000000..d3be5b6de --- /dev/null +++ b/docs/src/pages/docs/privacy.mdx @@ -0,0 +1,63 @@ +--- +title: Jan Privacy +description: Jan is an app that allows you to own your AI. We prioritize your control over your data and explain what data we collect and why. +keywords: + [ + Jan AI, + Jan, + ChatGPT alternative, + local AI, + private AI, + conversational AI, + OpenAI platform alternative, + no-subscription fee, + large language model, + about Jan, + desktop application, + thinking machine, + jan vision, + ] +--- + +# Privacy + +Jan is an app that allows you to own your AI. We prioritize your control over your data and explain what data we collect and why. + +- Jan can't see your chats with AI +- You're free to opt out + +## Why and what we track + +To build a reliable, user-friendly AI that you own, we need to understand how Jan is used. We collect two types of data: performance data and usage data. + +### Performance data +We track app crashes and collect technical details about what went wrong, along with basic information about the hardware you’re using. + +When Jan crashes, we collect technical details about what went wrong. + +- Specific AI model in use during the crash +- Hardware: `CPU`, `GPU`, `RAM` +- Logs: `Date/Time`, `OS & version`, `app version`, `error codes & messages`. + +### Usage data + +We track data like how often the app is opened to check: + +- **Active Users**: How many people use Jan daily to measure engagement +- **Retention Rates**: To understand if users are finding value in Jan over time + +Usage data is tied to a randomly generated telemetry ID. None of our usage data can be linked to your personal identity. + +## What we **don’t** track: +- Your conversations with Jan. Those stay on your device. +- Your files. We don’t scan, upload, or even look at them. +- Anything tied to your identity. + +## Using Cloud Models + +Jan allows you to connect cloud model APIs. If you choose to use cloud-based models (e.g. GPT, Claude models), the API provider handling the model will have access to your messages as part of processing the request. Again, Jan doesn't see or store these messages - they go directly to the provider. Remember: With local models, everything stays on your device, so no one - not even us- can see your messages. + +## Where we store & process data +We use [PostHog](https://posthog.com/eu) EU for analytics, ensuring all data is processed within the European Union. This setup complies with GDPR and other strict privacy regulations. PostHog lets us self-host and securely manage the data we collect. Read more [on PostHog's GDPR doc](https://posthog.com/docs/privacy/gdpr-compliance). + +For a detailed breakdown of the analytics data we collect, you can check out our analytics repo. If you have any questions or concerns, feel free to reach out to us at hi@jan.ai. \ No newline at end of file diff --git a/docs/src/pages/integrations/coding/_assets/tabby-answer-engine.png b/docs/src/pages/integrations/coding/_assets/tabby-answer-engine.png new file mode 100644 index 000000000..420b609fc Binary files /dev/null and b/docs/src/pages/integrations/coding/_assets/tabby-answer-engine.png differ diff --git a/docs/src/pages/integrations/coding/_assets/tabby-chat-sidebar.png b/docs/src/pages/integrations/coding/_assets/tabby-chat-sidebar.png new file mode 100644 index 000000000..50cfbd226 Binary files /dev/null and b/docs/src/pages/integrations/coding/_assets/tabby-chat-sidebar.png differ diff --git a/docs/src/pages/integrations/coding/tabby.mdx b/docs/src/pages/integrations/coding/tabby.mdx new file mode 100644 index 000000000..6647b4eb4 --- /dev/null +++ b/docs/src/pages/integrations/coding/tabby.mdx @@ -0,0 +1,104 @@ +--- +title: Tabby +description: A step-by-step guide on integrating Jan with Tabby and VSCode, JetBrains, or other IDEs. +keywords: + [ + Jan, + Customizable Intelligence, LLM, + local AI, + privacy focus, + free and open source, + private and offline, + conversational AI, + no-subscription fee, + large language models, + Tabby integration, + VSCode integration, + JetBrains integration, + ] +--- + +import { Tabs, Steps } from 'nextra/components' + +# Tabby + +## Integrate Jan with Tabby and Your Favorite IDEs + +[Tabby](https://www.tabbyml.com/) is an open-source, self-hosted AI coding assistant. +With Tabby, teams can easily set up their own LLM-powered code completion server. + +Tabby provides integrations with VSCode, JetBrains, and other IDEs to help developers code more efficiently, +and it can be used with various LLM services, including Jan. + +To integrate Jan with Tabby, follow these steps: + + + +### Step 1: Enable the Jan API Server + +To set up Tabby with Jan's Local Server, you must activate the Jan API Server with your chosen model. + +1. Click the `Local API Server` (`<>`) button above the Settings. Jan will direct you to the **Local API Server** section. +2. Configure the server, including the **IP Port**, **Cross-Origin Resource Sharing (CORS)**, and **Verbose Server Logs**. +3. Press the **Start Server** button. + +### Step 2: Find the Model ID and Ensure the Model is Activated + +1. Go to `Settings` > `My Models`. +2. Models are listed with their **Model ID** beneath their names. +3. Click the **three dots (⋮)** button next to the model. +4. Select **Start Model** to activate the model. + +### Step 3: Installing Tabby Server + +Use the following documentation to install the Tabby server: +- [Docker](https://tabby.tabbyml.com/docs/quick-start/installation/docker/) +- [Apple Silicon](https://tabby.tabbyml.com/docs/quick-start/installation/apple/) +- [Linux](https://tabby.tabbyml.com/docs/quick-start/installation/linux/) +- [Windows](https://tabby.tabbyml.com/docs/quick-start/installation/windows/) + +Then, follow the steps to connect Jan with the Tabby server: +[Connect Jan with Tabby](https://tabby.tabbyml.com/docs/references/models-http-api/jan.ai/). + +For example, to connect Jan with Tabby, save the following configuration under `~/.tabby/config.toml`: + +```toml title="~/.tabby/config.toml" +# Chat model +[model.chat.http] +kind = "openai/chat" +model_name = "model_id" +api_endpoint = "http://localhost:1337/v1" +api_key = "" +``` + +Currently, the Jan completion and embedding API is under construction. +Once completed, you can also connect Jan with Tabby for completion and embedding tasks. + +### Step 4: Installing Tabby on Your Favorite IDEs + +Refer to the following documentation to install the Tabby extension on your favorite IDEs: +- [Visual Studio Code](https://tabby.tabbyml.com/docs/extensions/installation/vscode/) +- [JetBrains IntelliJ Platform](https://tabby.tabbyml.com/docs/extensions/installation/intellij/) +- [VIM / NeoVIM](https://tabby.tabbyml.com/docs/extensions/installation/vim/) + + + +## How to Use Tabby with Jan Integration + +### Answer Engine: Chat with Your Codes and Documentation + +Tabby offers an [Answer Engine](https://tabby.tabbyml.com/docs/administration/answer-engine/) on the homepage, +which can leverage the Jan LLM and related contexts like code, documentation, and web pages to answer user questions. + +Simply open the Tabby homepage at [localhost:8080](http://localhost:8080) and ask your questions. + +![Answer Engine](./_assets/tabby-answer-engine.png) + +### IDE Chat Sidebar + +After installing the Tabby extension on your preferred IDEs, you can engage in a conversation with Jan to: + +1. Discuss your code, receive suggestions, and seek assistance. +2. Request Jan to inline edit your code, and then review and accept the proposed changes. + +![Chat Sidebar](./_assets/tabby-chat-sidebar.png) \ No newline at end of file diff --git a/electron/handlers/native.ts b/electron/handlers/native.ts index 869b9fd58..813231bd4 100644 --- a/electron/handlers/native.ts +++ b/electron/handlers/native.ts @@ -12,6 +12,9 @@ import { } from '@janhq/core/node' import { SelectFileOption } from '@janhq/core' import { menu } from '../utils/menu' +import { migrate } from '../utils/migration' +import { createUserSpace } from '../utils/path' +import { setupExtensions } from '../utils/extension' const isMac = process.platform === 'darwin' @@ -33,14 +36,28 @@ export function handleAppIPCs() { nativeTheme.themeSource = 'light' }) + /** + * Handles the "setCloseApp" IPC message by closing the main application window. + * This effectively closes the application if no other windows are open. + */ ipcMain.handle(NativeRoute.setCloseApp, () => { windowManager.mainWindow?.close() }) + /** + * Handles the "setMinimizeApp" IPC message by minimizing the main application window. + * The window will be minimized to the system's taskbar or dock. + */ ipcMain.handle(NativeRoute.setMinimizeApp, () => { windowManager.mainWindow?.minimize() }) + /** + * Handles the "setMaximizeApp" IPC message. It toggles the maximization state of the main window. + * If the window is currently maximized, it will be un-maximized (restored to its previous size). + * If the window is not maximized, it will be maximized to fill the screen. + * @param _event - The IPC event object. + */ ipcMain.handle(NativeRoute.setMaximizeApp, async (_event) => { if (windowManager.mainWindow?.isMaximized()) { windowManager.mainWindow.unmaximize() @@ -104,6 +121,11 @@ export function handleAppIPCs() { } }) + /** + * Handles the "selectDirectory" IPC message to open a dialog for selecting a directory. + * If no main window is found, logs an error and exits. + * @returns {string} The path of the selected directory, or nothing if canceled. + */ ipcMain.handle(NativeRoute.selectDirectory, async () => { const mainWindow = windowManager.mainWindow if (!mainWindow) { @@ -122,6 +144,14 @@ export function handleAppIPCs() { } }) + /** + * Handles the "selectFiles" IPC message to open a dialog for selecting files. + * Allows options for setting the dialog title, button label, and selection properties. + * Logs an error if no main window is found. + * @param _event - The IPC event object. + * @param option - Options for customizing file selection dialog. + * @returns {string[]} An array of selected file paths, or nothing if canceled. + */ ipcMain.handle( NativeRoute.selectFiles, async (_event, option?: SelectFileOption) => { @@ -156,11 +186,20 @@ export function handleAppIPCs() { } ) + /** + * Handles the "hideQuickAskWindow" IPC message to hide the quick ask window. + * @returns A promise that resolves when the window is hidden. + */ ipcMain.handle( NativeRoute.hideQuickAskWindow, async (): Promise => windowManager.hideQuickAskWindow() ) + /** + * Handles the "sendQuickAskInput" IPC message to send user input to the main window. + * @param _event - The IPC event object. + * @param input - User input string to be sent. + */ ipcMain.handle( NativeRoute.sendQuickAskInput, async (_event, input: string): Promise => { @@ -171,6 +210,12 @@ export function handleAppIPCs() { } ) + /** + * Handles the "showOpenMenu" IPC message to show the context menu at given coordinates. + * Only applicable on non-Mac platforms. + * @param e - The event object. + * @param args - Contains coordinates where the menu should appear. + */ ipcMain.handle(NativeRoute.showOpenMenu, function (e, args) { if (!isMac && windowManager.mainWindow) { menu.popup({ @@ -181,23 +226,55 @@ export function handleAppIPCs() { } }) + /** + * Handles the "hideMainWindow" IPC message to hide the main application window. + * @returns A promise that resolves when the window is hidden. + */ ipcMain.handle( NativeRoute.hideMainWindow, async (): Promise => windowManager.hideMainWindow() ) + /** + * Handles the "showMainWindow" IPC message to show the main application window. + * @returns A promise that resolves when the window is shown. + */ ipcMain.handle( NativeRoute.showMainWindow, async (): Promise => windowManager.showMainWindow() ) + /** + * Handles the "quickAskSizeUpdated" IPC message to update the size of the quick ask window. + * Resizes window by the given height offset. + * @param _event - The IPC event object. + * @param heightOffset - The amount of height to increase. + * @returns A promise that resolves when the window is resized. + */ ipcMain.handle( NativeRoute.quickAskSizeUpdated, async (_event, heightOffset: number): Promise => windowManager.expandQuickAskWindow(heightOffset) ) + /** + * Handles the "ackDeepLink" IPC message to acknowledge a deep link. + * Triggers handling of deep link in the application. + * @param _event - The IPC event object. + * @returns A promise that resolves when the deep link is acknowledged. + */ ipcMain.handle(NativeRoute.ackDeepLink, async (_event): Promise => { windowManager.ackDeepLink() }) + + /** + * Handles the "factoryReset" IPC message to reset the application to its initial state. + * Clears loaded modules, recreates user space, runs migrations, and sets up extensions. + * @param _event - The IPC event object. + * @returns A promise that resolves after the reset operations are complete. + */ + ipcMain.handle(NativeRoute.factoryReset, async (_event): Promise => { + ModuleManager.instance.clearImportedModules() + return createUserSpace().then(migrate).then(setupExtensions) + }) } diff --git a/electron/managers/window.ts b/electron/managers/window.ts index c9c43ea77..918036365 100644 --- a/electron/managers/window.ts +++ b/electron/managers/window.ts @@ -28,6 +28,7 @@ class WindowManager { ...mainWindowConfig, width: bounds.width, height: bounds.height, + show: false, x: bounds.x, y: bounds.y, webPreferences: { @@ -78,6 +79,10 @@ class WindowManager { windowManager.hideMainWindow() } }) + + windowManager.mainWindow?.on('ready-to-show', function () { + windowManager.mainWindow?.show() + }) } createQuickAskWindow(preloadPath: string, startUrl: string): void { diff --git a/electron/tests/e2e/thread.e2e.spec.ts b/electron/tests/e2e/thread.e2e.spec.ts index 5d7328053..dfd131988 100644 --- a/electron/tests/e2e/thread.e2e.spec.ts +++ b/electron/tests/e2e/thread.e2e.spec.ts @@ -25,7 +25,7 @@ test('Select GPT model from Hub and Chat with Invalid API Key', async ({ { timeout: TIMEOUT } ) - const APIKeyError = page.getByTestId('invalid-API-key-error') + const APIKeyError = page.getByTestId('passthrough-error-message') await expect(APIKeyError).toBeVisible({ timeout: TIMEOUT, }) diff --git a/electron/utils/migration.ts b/electron/utils/migration.ts index 80851f9de..505de0f7b 100644 --- a/electron/utils/migration.ts +++ b/electron/utils/migration.ts @@ -3,7 +3,6 @@ import { app } from 'electron' import { join } from 'path' import { rmdirSync, - readFileSync, existsSync, mkdirSync, readdirSync, diff --git a/extensions/assistant-extension/src/node/engine.ts b/extensions/assistant-extension/src/node/engine.ts deleted file mode 100644 index 05a380340..000000000 --- a/extensions/assistant-extension/src/node/engine.ts +++ /dev/null @@ -1,38 +0,0 @@ -import fs from 'fs' -import path from 'path' -import { SettingComponentProps, getJanDataFolderPath } from '@janhq/core/node' - -// Sec: Do not send engine settings over requests -// Read it manually instead -export const readEmbeddingEngine = (engineName: string) => { - if (engineName !== 'openai' && engineName !== 'groq') { - const engineSettings = fs.readFileSync( - path.join(getJanDataFolderPath(), 'engines', `${engineName}.json`), - 'utf-8' - ) - return JSON.parse(engineSettings) - } else { - const settingDirectoryPath = path.join( - getJanDataFolderPath(), - 'settings', - '@janhq', - // TODO: James - To be removed - engineName === 'openai' - ? 'inference-openai-extension' - : 'inference-groq-extension', - 'settings.json' - ) - - const content = fs.readFileSync(settingDirectoryPath, 'utf-8') - const settings: SettingComponentProps[] = JSON.parse(content) - const apiKeyId = engineName === 'openai' ? 'openai-api-key' : 'groq-api-key' - const keySetting = settings.find((setting) => setting.key === apiKeyId) - - let apiKey = keySetting?.controllerProps.value - if (typeof apiKey !== 'string') apiKey = '' - - return { - api_key: apiKey, - } - } -} diff --git a/extensions/assistant-extension/src/node/retrieval.ts b/extensions/assistant-extension/src/node/retrieval.ts index 3386029fa..5804ff763 100644 --- a/extensions/assistant-extension/src/node/retrieval.ts +++ b/extensions/assistant-extension/src/node/retrieval.ts @@ -8,7 +8,6 @@ import { MemoryVectorStore } from 'langchain/vectorstores/memory' import { HNSWLib } from 'langchain/vectorstores/hnswlib' import { OpenAIEmbeddings } from 'langchain/embeddings/openai' -import { readEmbeddingEngine } from './engine' export class Retrieval { public chunkSize: number = 100 @@ -28,8 +27,8 @@ export class Retrieval { // declare time-weighted retriever and storage this.timeWeightedVectorStore = new MemoryVectorStore( new OpenAIEmbeddings( - { openAIApiKey: 'nitro-embedding' }, - { basePath: 'http://127.0.0.1:3928/v1' } + { openAIApiKey: 'cortex-embedding' }, + { basePath: 'http://127.0.0.1:39291/v1' } ) ) this.timeWeightedretriever = new TimeWeightedVectorStoreRetriever({ @@ -49,21 +48,11 @@ export class Retrieval { } public updateEmbeddingEngine(model: string, engine: string): void { - // Engine settings are not compatible with the current embedding model params - // Switch case manually for now - if (engine === 'nitro') { - this.embeddingModel = new OpenAIEmbeddings( - { openAIApiKey: 'nitro-embedding', model }, - // TODO: Raw settings - { basePath: 'http://127.0.0.1:3928/v1' }, - ) - } else { - // Fallback to OpenAI Settings - const settings = readEmbeddingEngine(engine) - this.embeddingModel = new OpenAIEmbeddings({ - openAIApiKey: settings.api_key, - }) - } + this.embeddingModel = new OpenAIEmbeddings( + { openAIApiKey: 'cortex-embedding', model }, + // TODO: Raw settings + { basePath: 'http://127.0.0.1:39291/v1' } + ) // update time-weighted embedding model this.timeWeightedVectorStore.embeddings = this.embeddingModel diff --git a/extensions/inference-anthropic-extension/package.json b/extensions/inference-anthropic-extension/package.json index 19c0df5e8..8115ba2df 100644 --- a/extensions/inference-anthropic-extension/package.json +++ b/extensions/inference-anthropic-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-anthropic-extension", "productName": "Anthropic Inference Engine", - "version": "1.0.2", + "version": "1.0.3", "description": "This extension enables Anthropic chat completion API calls", "main": "dist/index.js", "module": "dist/module.js", diff --git a/extensions/inference-anthropic-extension/resources/models.json b/extensions/inference-anthropic-extension/resources/models.json index 8b4ef03fb..59e41245b 100644 --- a/extensions/inference-anthropic-extension/resources/models.json +++ b/extensions/inference-anthropic-extension/resources/models.json @@ -1,28 +1,4 @@ [ - { - "sources": [ - { - "url": "https://www.anthropic.com/" - } - ], - "id": "claude-3-opus-20240229", - "object": "model", - "name": "Claude 3 Opus 20240229", - "version": "1.1", - "description": "Claude 3 Opus is a powerful model suitables for highly complex task.", - "format": "api", - "settings": {}, - "parameters": { - "max_tokens": 4096, - "temperature": 0.7, - "stream": false - }, - "metadata": { - "author": "Anthropic", - "tags": ["General", "Big Context Length"] - }, - "engine": "anthropic" - }, { "sources": [ { @@ -47,78 +23,6 @@ }, "engine": "anthropic" }, - { - "sources": [ - { - "url": "https://www.anthropic.com/" - } - ], - "id": "claude-3-sonnet-20240229", - "object": "model", - "name": "Claude 3 Sonnet 20240229", - "version": "1.1", - "description": "Claude 3 Sonnet is an ideal model balance of intelligence and speed for enterprise workloads.", - "format": "api", - "settings": {}, - "parameters": { - "max_tokens": 4096, - "temperature": 0.7, - "stream": false - }, - "metadata": { - "author": "Anthropic", - "tags": ["General", "Big Context Length"] - }, - "engine": "anthropic" - }, - { - "sources": [ - { - "url": "https://www.anthropic.com/" - } - ], - "id": "claude-3-haiku-20240307", - "object": "model", - "name": "Claude 3 Haiku 20240307", - "version": "1.1", - "description": "Claude 3 Haiku is the fastest model provides near-instant responsiveness.", - "format": "api", - "settings": {}, - "parameters": { - "max_tokens": 4096, - "temperature": 0.7, - "stream": false - }, - "metadata": { - "author": "Anthropic", - "tags": ["General", "Big Context Length"] - }, - "engine": "anthropic" - }, - { - "sources": [ - { - "url": "https://www.anthropic.com/" - } - ], - "id": "claude-3-5-haiku-20241022", - "object": "model", - "name": "Claude 3.5 Haiku 20241022", - "version": "1.0", - "description": "Claude 3.5 Haiku is the fastest model provides near-instant responsiveness.", - "format": "api", - "settings": {}, - "parameters": { - "max_tokens": 8192, - "temperature": 0.7, - "stream": false - }, - "metadata": { - "author": "Anthropic", - "tags": ["General", "Big Context Length"] - }, - "engine": "anthropic" - }, { "sources": [ { @@ -143,54 +47,6 @@ }, "engine": "anthropic" }, - { - "sources": [ - { - "url": "https://www.anthropic.com/" - } - ], - "id": "claude-3-5-sonnet-20240620", - "object": "model", - "name": "Claude 3.5 Sonnet 20240620", - "version": "1.1", - "description": "Claude 3.5 Sonnet raises the industry bar for intelligence, outperforming competitor models and Claude 3 Opus on a wide range of evaluations, with the speed and cost of our mid-tier model, Claude 3 Sonnet.", - "format": "api", - "settings": {}, - "parameters": { - "max_tokens": 8192, - "temperature": 0.7, - "stream": true - }, - "metadata": { - "author": "Anthropic", - "tags": ["General", "Big Context Length"] - }, - "engine": "anthropic" - }, - { - "sources": [ - { - "url": "https://www.anthropic.com/" - } - ], - "id": "claude-3-5-sonnet-20241022", - "object": "model", - "name": "Claude 3.5 Sonnet 20241022", - "version": "1.0", - "description": "Claude 3.5 Sonnet raises the industry bar for intelligence, outperforming competitor models and Claude 3 Opus on a wide range of evaluations, with the speed and cost of our mid-tier model, Claude 3 Sonnet.", - "format": "api", - "settings": {}, - "parameters": { - "max_tokens": 8192, - "temperature": 0.7, - "stream": true - }, - "metadata": { - "author": "Anthropic", - "tags": ["General", "Big Context Length"] - }, - "engine": "anthropic" - }, { "sources": [ { diff --git a/extensions/inference-cohere-extension/src/index.ts b/extensions/inference-cohere-extension/src/index.ts index dd7f03317..2615ea893 100644 --- a/extensions/inference-cohere-extension/src/index.ts +++ b/extensions/inference-cohere-extension/src/index.ts @@ -113,6 +113,8 @@ export default class JanInferenceCohereExtension extends RemoteOAIEngine { } transformResponse = (data: any) => { - return typeof data === 'object' ? data.text : JSON.parse(data).text ?? '' + return typeof data === 'object' + ? data.text + : (JSON.parse(data.replace('data: ', '').trim()).text ?? '') } } diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt index e6d5cb833..40ac6bb0e 100644 --- a/extensions/inference-cortex-extension/bin/version.txt +++ b/extensions/inference-cortex-extension/bin/version.txt @@ -1 +1 @@ -1.0.2 \ No newline at end of file +1.0.4-rc4 \ No newline at end of file diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat index ecff683c3..7d9a9213a 100644 --- a/extensions/inference-cortex-extension/download.bat +++ b/extensions/inference-cortex-extension/download.bat @@ -2,23 +2,23 @@ set BIN_PATH=./bin set SHARED_PATH=./../../electron/shared set /p CORTEX_VERSION=<./bin/version.txt +set ENGINE_VERSION=0.1.40 @REM Download cortex.llamacpp binaries -set VERSION=v0.1.35 -set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64 -set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION% -set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan +set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/v%ENGINE_VERSION%/cortex.llamacpp-%ENGINE_VERSION%-windows-amd64 +set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/v%ENGINE_VERSION% +set SUBFOLDERS=windows-amd64-noavx-cuda-12-0 windows-amd64-noavx-cuda-11-7 windows-amd64-avx2-cuda-12-0 windows-amd64-avx2-cuda-11-7 windows-amd64-noavx windows-amd64-avx windows-amd64-avx2 windows-amd64-avx512 windows-amd64-vulkan -call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp +call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-12-0/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-11-7/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-12-0/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-11-7/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx512/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-vulkan/v%ENGINE_VERSION% call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH% call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH% @@ -28,10 +28,10 @@ del %BIN_PATH%\cortex.exe @REM Loop through each folder and move DLLs (excluding engine.dll) for %%F in (%SUBFOLDERS%) do ( - echo Processing folder: %BIN_PATH%\%%F + echo Processing folder: %SHARED_PATH%\engines\cortex.llamacpp\%%F\v%ENGINE_VERSION% @REM Move all .dll files except engine.dll - for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do ( + for %%D in (%SHARED_PATH%\engines\cortex.llamacpp\%%F\v%ENGINE_VERSION%\*.dll) do ( if /I not "%%~nxD"=="engine.dll" ( move "%%D" "%BIN_PATH%" ) diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh index 902a31e51..f62e5961b 100755 --- a/extensions/inference-cortex-extension/download.sh +++ b/extensions/inference-cortex-extension/download.sh @@ -2,9 +2,11 @@ # Read CORTEX_VERSION CORTEX_VERSION=$(cat ./bin/version.txt) -CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download" -ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35" -CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35" +ENGINE_VERSION=0.1.40 +CORTEX_RELEASE_URL="https://github.com/janhq/cortex.cpp/releases/download" +ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}" +CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}" +SHARED_PATH="../../electron/shared" # Detect platform OS_TYPE=$(uname) @@ -17,17 +19,19 @@ if [ "$OS_TYPE" == "Linux" ]; then chmod +x "./bin/cortex-server" # Download engines for Linux - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1 - download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1 - download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx512/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-12-0/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-11-7/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-12-0/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-11-7/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-vulkan/v${ENGINE_VERSION}" 1 + download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1 + download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1 + mkdir -p "${SHARED_PATH}/engines/cortex.llamacpp/deps" + touch "${SHARED_PATH}/engines/cortex.llamacpp/deps/keep" elif [ "$OS_TYPE" == "Darwin" ]; then # macOS downloads @@ -38,8 +42,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then chmod +x "./bin/cortex-server" # Download engines for macOS - download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp - download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp + download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-arm64/v${ENGINE_VERSION}" + download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-amd64/v${ENGINE_VERSION}" else echo "Unsupported operating system: $OS_TYPE" diff --git a/extensions/inference-cortex-extension/package.json b/extensions/inference-cortex-extension/package.json index d262ad5ec..c6d3f70b6 100644 --- a/extensions/inference-cortex-extension/package.json +++ b/extensions/inference-cortex-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.21", + "version": "1.0.22", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json index a2197dab2..90aa50117 100644 --- a/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json +++ b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json @@ -1,8 +1,8 @@ { "sources": [ { - "url": "https://huggingface.co/cortexso/phi3/resolve/main/model.gguf", - "filename": "model.gguf" + "url": "https://huggingface.co/bartowski/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q4_K_M.gguf", + "filename": "Phi-3-mini-4k-instruct-Q4_K_M.gguf" } ], "id": "phi3-3.8b", @@ -14,7 +14,7 @@ "settings": { "ctx_len": 4096, "prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n", - "llama_model_path": "model.gguf", + "llama_model_path": "Phi-3-mini-4k-instruct-Q4_K_M.gguf", "ngl": 33 }, "parameters": { diff --git a/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json index f7131ee98..afce04952 100644 --- a/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json +++ b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json @@ -1,8 +1,8 @@ { "sources": [ { - "url": "https://huggingface.co/bartowski/Phi-3-medium-128k-instruct-GGUF/resolve/main/Phi-3-medium-128k-instruct-Q4_K_M.gguf", - "filename": "Phi-3-medium-128k-instruct-Q4_K_M.gguf" + "url": "https://huggingface.co/bartowski/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q4_K_M.gguf", + "filename": "Phi-3-mini-4k-instruct-Q4_K_M.gguf" } ], "id": "phi3-medium", @@ -14,7 +14,7 @@ "settings": { "ctx_len": 128000, "prompt_template": "<|user|> {prompt}<|end|><|assistant|>", - "llama_model_path": "Phi-3-medium-128k-instruct-Q4_K_M.gguf", + "llama_model_path": "Phi-3-mini-4k-instruct-Q4_K_M.gguf", "ngl": 33 }, "parameters": { diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts index 34ad9295d..284386869 100644 --- a/extensions/inference-cortex-extension/rollup.config.ts +++ b/extensions/inference-cortex-extension/rollup.config.ts @@ -120,6 +120,7 @@ export default [ DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'), CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'), + CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.40'), }), // Allow json resolution json(), diff --git a/extensions/inference-cortex-extension/src/@types/global.d.ts b/extensions/inference-cortex-extension/src/@types/global.d.ts index 48dbcd780..381a80f5e 100644 --- a/extensions/inference-cortex-extension/src/@types/global.d.ts +++ b/extensions/inference-cortex-extension/src/@types/global.d.ts @@ -1,6 +1,7 @@ declare const NODE: string declare const CORTEX_API_URL: string declare const CORTEX_SOCKET_URL: string +declare const CORTEX_ENGINE_VERSION: string declare const DEFAULT_SETTINGS: Array declare const MODELS: Array diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index 44ec423da..15f7a0294 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -17,7 +17,10 @@ import { extractModelLoadParams, fs, events, - ModelEvent + ModelEvent, + SystemInformation, + dirName, + AppConfigurationEventName, } from '@janhq/core' import PQueue from 'p-queue' import ky from 'ky' @@ -45,6 +48,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { provider: string = InferenceEngine.cortex + shouldReconnect = true + /** * The URL for making inference requests. */ @@ -65,23 +70,34 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { super.onLoad() + this.queue.add(() => this.clean()) + // Run the process watchdog const systemInfo = await systemInformation() - await this.clean() - await executeOnMain(NODE, 'run', systemInfo) - + this.queue.add(() => executeOnMain(NODE, 'run', systemInfo)) this.queue.add(() => this.healthz()) - + this.queue.add(() => this.setDefaultEngine(systemInfo)) this.subscribeToEvents() window.addEventListener('beforeunload', () => { this.clean() }) + + const currentMode = systemInfo.gpuSetting?.run_mode + + events.on(AppConfigurationEventName.OnConfigurationUpdate, async () => { + const systemInfo = await systemInformation() + // Update run mode on settings update + if (systemInfo.gpuSetting?.run_mode !== currentMode) + this.queue.add(() => this.setDefaultEngine(systemInfo)) + }) } - onUnload(): void { + async onUnload() { + console.log('Clean up cortex.cpp services') + this.shouldReconnect = false this.clean() - executeOnMain(NODE, 'dispose') + await executeOnMain(NODE, 'dispose') super.onUnload() } @@ -89,7 +105,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { model: Model & { file_path?: string } ): Promise { if ( - model.engine === InferenceEngine.nitro && + (model.engine === InferenceEngine.nitro || model.settings.vision_model) && model.settings.llama_model_path ) { // Legacy chat model support @@ -105,7 +121,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { model.settings = settings } - if (model.engine === InferenceEngine.nitro && model.settings.mmproj) { + if ( + (model.engine === InferenceEngine.nitro || model.settings.vision_model) && + model.settings.mmproj + ) { // Legacy clip vision model support model.settings = { ...model.settings, @@ -127,6 +146,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { ? InferenceEngine.cortex_llamacpp : model.engine, }, + timeout: false, }) .json() .catch(async (e) => { @@ -149,25 +169,54 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { * Do health check on cortex.cpp * @returns */ - healthz(): Promise { + private healthz(): Promise { return ky .get(`${CORTEX_API_URL}/healthz`, { retry: { - limit: 10, + limit: 20, + delay: () => 500, methods: ['get'], }, }) - .then(() => { }) + .then(() => {}) + } + + /** + * Set default engine variant on launch + */ + private async setDefaultEngine(systemInfo: SystemInformation) { + const variant = await executeOnMain( + NODE, + 'engineVariant', + systemInfo.gpuSetting + ) + return ( + ky + // Fallback support for legacy API + .post( + `${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`, + { + json: { + version: CORTEX_ENGINE_VERSION, + variant, + }, + } + ) + .then(() => {}) + ) } /** * Clean cortex processes * @returns */ - clean(): Promise { + private clean(): Promise { return ky .delete(`${CORTEX_API_URL}/processmanager/destroy`, { timeout: 2000, // maximum 2 seconds + retry: { + limit: 0, + }, }) .catch(() => { // Do nothing @@ -177,7 +226,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { /** * Subscribe to cortex.cpp websocket events */ - subscribeToEvents() { + private subscribeToEvents() { this.queue.add( () => new Promise((resolve) => { @@ -195,33 +244,47 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { ) const percent = total > 0 ? transferred / total : 0 - events.emit(DownloadTypes[data.type as keyof typeof DownloadTypes], { - modelId: data.task.id, - percent: percent, - size: { - transferred: transferred, - total: total, - }, - }) + events.emit( + DownloadTypes[data.type as keyof typeof DownloadTypes], + { + modelId: data.task.id, + percent: percent, + size: { + transferred: transferred, + total: total, + }, + } + ) // Update models list from Hub if (data.type === DownloadTypes.DownloadSuccess) { // Delay for the state update from cortex.cpp // Just to be sure setTimeout(() => { - events.emit(ModelEvent.OnModelsUpdate, {}) + events.emit(ModelEvent.OnModelsUpdate, { + fetch: true, + }) }, 500) } }) + + this.socket.onclose = (event) => { + console.log('WebSocket closed:', event) + events.emit(ModelEvent.OnModelStopped, {}) + if (this.shouldReconnect) { + console.log(`Attempting to reconnect...`) + setTimeout(() => this.subscribeToEvents(), 1000) + } + } + resolve() }) ) } - } /// Legacy -export const getModelFilePath = async ( - model: Model, +const getModelFilePath = async ( + model: Model & { file_path?: string }, file: string ): Promise => { // Symlink to the model file @@ -231,6 +294,9 @@ export const getModelFilePath = async ( ) { return model.sources[0]?.url } + if (model.file_path) { + await joinPath([await dirName(model.file_path), file]) + } return joinPath([await getJanDataFolderPath(), 'models', model.id, file]) } /// diff --git a/extensions/inference-cortex-extension/src/node/cpuInfo.ts b/extensions/inference-cortex-extension/src/node/cpuInfo.ts new file mode 100644 index 000000000..4366a995b --- /dev/null +++ b/extensions/inference-cortex-extension/src/node/cpuInfo.ts @@ -0,0 +1,27 @@ +import { cpuInfo } from 'cpu-instructions' + +// Check the CPU info and determine the supported instruction set +const info = cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512') + ? 'avx512' + : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2') + ? 'avx2' + : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX') + ? 'avx' + : 'noavx' + +// Send the result and wait for confirmation before exiting +new Promise((resolve, reject) => { + // @ts-ignore + process.send(info, (error: Error | null) => { + if (error) { + reject(error) + } else { + resolve() + } + }) +}) + .then(() => process.exit(0)) + .catch((error) => { + console.error('Failed to send info:', error) + process.exit(1) + }) diff --git a/extensions/inference-cortex-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts index 622eb38af..1bcefce9d 100644 --- a/extensions/inference-cortex-extension/src/node/execute.test.ts +++ b/extensions/inference-cortex-extension/src/node/execute.test.ts @@ -1,7 +1,8 @@ import { describe, expect, it } from '@jest/globals' -import { executableCortexFile } from './execute' -import { GpuSetting } from '@janhq/core' +import { engineVariant, executableCortexFile } from './execute' +import { GpuSetting } from '@janhq/core/node' import { cpuInfo } from 'cpu-instructions' +import { fork } from 'child_process' let testSettings: GpuSetting = { run_mode: 'cpu', @@ -30,6 +31,15 @@ jest.mock('cpu-instructions', () => ({ let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock mockCpuInfo.mockReturnValue([]) +jest.mock('@janhq/core/node', () => ({ + appResourcePath: () => '.', + log: jest.fn(), +})) +jest.mock('child_process', () => ({ + fork: jest.fn(), +})) +const mockFork = fork as jest.Mock + describe('test executable cortex file', () => { afterAll(function () { Object.defineProperty(process, 'platform', { @@ -37,6 +47,37 @@ describe('test executable cortex file', () => { }) }) + it('executes on MacOS', () => { + const mockProcess = { + on: jest.fn((event, callback) => { + if (event === 'message') { + callback('noavx') + } + }), + send: jest.fn(), + } + Object.defineProperty(process, 'platform', { + value: 'darwin', + }) + Object.defineProperty(process, 'arch', { + value: 'arm64', + }) + expect(executableCortexFile(testSettings)).toEqual( + expect.objectContaining({ + enginePath: expect.stringContaining('shared'), + executablePath: + originalPlatform === 'darwin' + ? expect.stringContaining(`cortex-server`) + : expect.anything(), + cudaVisibleDevices: '', + vkVisibleDevices: '', + }) + ) + + mockFork.mockReturnValue(mockProcess) + expect(engineVariant(testSettings)).resolves.toEqual('mac-arm64') + }) + it('executes on MacOS', () => { Object.defineProperty(process, 'platform', { value: 'darwin', @@ -44,25 +85,23 @@ describe('test executable cortex file', () => { Object.defineProperty(process, 'arch', { value: 'arm64', }) - expect(executableCortexFile(testSettings)).toEqual( - expect.objectContaining({ - enginePath: expect.stringContaining(`arm64`), - binPath: expect.stringContaining(`bin`), - executablePath: - originalPlatform === 'darwin' - ? expect.stringContaining(`cortex-server`) - : expect.anything(), - cudaVisibleDevices: '', - vkVisibleDevices: '', - }) - ) + + const mockProcess = { + on: jest.fn((event, callback) => { + if (event === 'message') { + callback('noavx') + } + }), + send: jest.fn(), + } + mockFork.mockReturnValue(mockProcess) Object.defineProperty(process, 'arch', { value: 'x64', }) + expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`x64`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('shared'), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`cortex-server`) @@ -71,6 +110,7 @@ describe('test executable cortex file', () => { vkVisibleDevices: '', }) ) + expect(engineVariant(testSettings)).resolves.toEqual('mac-amd64') }) it('executes on Windows CPU', () => { @@ -81,16 +121,25 @@ describe('test executable cortex file', () => { ...testSettings, run_mode: 'cpu', } - mockCpuInfo.mockReturnValue(['avx']) + const mockProcess = { + on: jest.fn((event, callback) => { + if (event === 'message') { + callback('avx') + } + }), + send: jest.fn(), + } + mockFork.mockReturnValue(mockProcess) + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`avx`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('shared'), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant()).resolves.toEqual('windows-amd64-avx') }) it('executes on Windows Cuda 11', () => { @@ -117,16 +166,27 @@ describe('test executable cortex file', () => { }, ], } - mockCpuInfo.mockReturnValue(['avx2']) + + const mockProcess = { + on: jest.fn((event, callback) => { + if (event === 'message') { + callback('avx2') + } + }), + send: jest.fn(), + } + mockFork.mockReturnValue(mockProcess) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`avx2-cuda-11-7`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('shared'), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).resolves.toEqual( + 'windows-amd64-avx2-cuda-11-7' + ) }) it('executes on Windows Cuda 12', () => { @@ -153,16 +213,36 @@ describe('test executable cortex file', () => { }, ], } - mockCpuInfo.mockReturnValue(['noavx']) + mockFork.mockReturnValue({ + on: jest.fn((event, callback) => { + if (event === 'message') { + callback('noavx') + } + }), + send: jest.fn(), + }) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`noavx-cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('shared'), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).resolves.toEqual( + 'windows-amd64-noavx-cuda-12-0' + ) + mockFork.mockReturnValue({ + on: jest.fn((event, callback) => { + if (event === 'message') { + callback('avx512') + } + }), + send: jest.fn(), + }) + expect(engineVariant(settings)).resolves.toEqual( + 'windows-amd64-avx2-cuda-12-0' + ) }) it('executes on Linux CPU', () => { @@ -173,15 +253,23 @@ describe('test executable cortex file', () => { ...testSettings, run_mode: 'cpu', } - mockCpuInfo.mockReturnValue(['noavx']) + mockFork.mockReturnValue({ + on: jest.fn((event, callback) => { + if (event === 'message') { + callback('noavx') + } + }), + send: jest.fn(), + }) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`noavx`), + enginePath: expect.stringContaining('shared'), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant()).resolves.toEqual('linux-amd64-noavx') }) it('executes on Linux Cuda 11', () => { @@ -208,15 +296,25 @@ describe('test executable cortex file', () => { }, ], } + + mockFork.mockReturnValue({ + on: jest.fn((event, callback) => { + if (event === 'message') { + callback('avx512') + } + }), + send: jest.fn(), + }) + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-11-7`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('shared'), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).resolves.toBe('linux-amd64-avx2-cuda-11-7') }) it('executes on Linux Cuda 12', () => { @@ -243,15 +341,25 @@ describe('test executable cortex file', () => { }, ], } + mockFork.mockReturnValue({ + on: jest.fn((event, callback) => { + if (event === 'message') { + callback('avx2') + } + }), + send: jest.fn(), + }) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('shared'), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).resolves.toEqual( + 'linux-amd64-avx2-cuda-12-0' + ) }) // Generate test for different cpu instructions on Linux @@ -266,18 +374,27 @@ describe('test executable cortex file', () => { const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] cpuInstructions.forEach((instruction) => { - mockCpuInfo.mockReturnValue([instruction]) + mockFork.mockReturnValue({ + on: jest.fn((event, callback) => { + if (event === 'message') { + callback(instruction) + } + }), + send: jest.fn(), + }) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(instruction), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('shared'), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant(settings)).resolves.toEqual( + `linux-amd64-${instruction}` + ) }) }) // Generate test for different cpu instructions on Windows @@ -291,16 +408,25 @@ describe('test executable cortex file', () => { } const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] cpuInstructions.forEach((instruction) => { - mockCpuInfo.mockReturnValue([instruction]) + mockFork.mockReturnValue({ + on: jest.fn((event, callback) => { + if (event === 'message') { + callback(instruction) + } + }), + send: jest.fn(), + }) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(instruction), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('shared'), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant(settings)).resolves.toEqual( + `windows-amd64-${instruction}` + ) }) }) @@ -331,16 +457,25 @@ describe('test executable cortex file', () => { } const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] cpuInstructions.forEach((instruction) => { - mockCpuInfo.mockReturnValue([instruction]) + mockFork.mockReturnValue({ + on: jest.fn((event, callback) => { + if (event === 'message') { + callback(instruction) + } + }), + send: jest.fn(), + }) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('shared'), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).resolves.toEqual( + `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0` + ) }) }) @@ -371,16 +506,25 @@ describe('test executable cortex file', () => { ], } cpuInstructions.forEach((instruction) => { - mockCpuInfo.mockReturnValue([instruction]) + mockFork.mockReturnValue({ + on: jest.fn((event, callback) => { + if (event === 'message') { + callback(instruction) + } + }), + send: jest.fn(), + }) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('shared'), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).resolves.toEqual( + `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0` + ) }) }) @@ -412,16 +556,23 @@ describe('test executable cortex file', () => { ], } cpuInstructions.forEach((instruction) => { - mockCpuInfo.mockReturnValue([instruction]) + mockFork.mockReturnValue({ + on: jest.fn((event, callback) => { + if (event === 'message') { + callback(instruction) + } + }), + send: jest.fn(), + }) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`vulkan`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('shared'), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).resolves.toEqual(`linux-amd64-vulkan`) }) }) @@ -439,11 +590,17 @@ describe('test executable cortex file', () => { ...testSettings, run_mode: 'cpu', } - mockCpuInfo.mockReturnValue([]) + mockFork.mockReturnValue({ + on: jest.fn((event, callback) => { + if (event === 'message') { + callback('noavx') + } + }), + send: jest.fn(), + }) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`x64`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('shared'), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`cortex-server`) diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts index 74ffb48c6..0b091d464 100644 --- a/extensions/inference-cortex-extension/src/node/execute.ts +++ b/extensions/inference-cortex-extension/src/node/execute.ts @@ -1,10 +1,9 @@ -import { GpuSetting } from '@janhq/core' import * as path from 'path' -import { cpuInfo } from 'cpu-instructions' +import { GpuSetting, appResourcePath, log } from '@janhq/core/node' +import { fork } from 'child_process' export interface CortexExecutableOptions { enginePath: string - binPath: string executablePath: string cudaVisibleDevices: string vkVisibleDevices: string @@ -21,11 +20,7 @@ const gpuRunMode = (settings?: GpuSetting): string => { if (!settings) return '' - return settings.vulkan === true - ? 'vulkan' - : settings.run_mode === 'cpu' - ? '' - : 'cuda' + return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda' } /** @@ -34,12 +29,12 @@ const gpuRunMode = (settings?: GpuSetting): string => { */ const os = (): string => { return process.platform === 'win32' - ? 'win' + ? 'windows-amd64' : process.platform === 'darwin' ? process.arch === 'arm64' - ? 'arm64' - : 'x64' - : 'linux' + ? 'mac-arm64' + : 'mac-amd64' + : 'linux-amd64' } /** @@ -57,7 +52,9 @@ const extension = (): '.exe' | '' => { */ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => { const isUsingCuda = - settings?.vulkan !== true && settings?.run_mode === 'gpu' && os() !== 'mac' + settings?.vulkan !== true && + settings?.run_mode === 'gpu' && + !os().includes('mac') if (!isUsingCuda) return undefined return settings?.cuda?.version === '11' ? '11-7' : '12-0' @@ -67,48 +64,74 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => { * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'. * @returns */ -const cpuInstructions = (): string => { +const cpuInstructions = async (): Promise => { if (process.platform === 'darwin') return '' - return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512') - ? 'avx512' - : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2') - ? 'avx2' - : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX') - ? 'avx' - : 'noavx' + + const child = fork(path.join(__dirname, './cpuInfo.js')) // Path to the child process file + + return new Promise((resolve, reject) => { + child.on('message', (cpuInfo?: string) => { + resolve(cpuInfo ?? 'noavx') + child.kill() // Kill the child process after receiving the result + }) + + child.on('error', (err) => { + resolve('noavx') + child.kill() + }) + + child.on('exit', (code) => { + if (code !== 0) { + resolve('noavx') + child.kill() + } + }) + }) } /** - * Find which executable file to run based on the current platform. - * @returns The name of the executable file to run. + * The executable options for the cortex.cpp extension. */ export const executableCortexFile = ( gpuSetting?: GpuSetting ): CortexExecutableOptions => { - const cpuInstruction = cpuInstructions() - let engineFolder = gpuSetting?.vulkan - ? 'vulkan' - : process.platform === 'darwin' - ? os() - : [ - gpuRunMode(gpuSetting) !== 'cuda' || - cpuInstruction === 'avx2' || cpuInstruction === 'avx512' - ? cpuInstruction - : 'noavx', - gpuRunMode(gpuSetting), - cudaVersion(gpuSetting), - ] - .filter((e) => !!e) - .join('-') let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let binaryName = `cortex-server${extension()}` const binPath = path.join(__dirname, '..', 'bin') return { - enginePath: path.join(binPath, engineFolder), + enginePath: path.join(appResourcePath(), 'shared'), executablePath: path.join(binPath, binaryName), - binPath: binPath, cudaVisibleDevices, vkVisibleDevices, } } + +/** + * Find which variant to run based on the current platform. + */ +export const engineVariant = async ( + gpuSetting?: GpuSetting +): Promise => { + const cpuInstruction = await cpuInstructions() + log(`[CORTEX]: CPU instruction: ${cpuInstruction}`) + let engineVariant = [ + os(), + gpuSetting?.vulkan + ? 'vulkan' + : gpuRunMode(gpuSetting) !== 'cuda' + ? // CPU mode - support all variants + cpuInstruction + : // GPU mode - packaged CUDA variants of avx2 and noavx + cpuInstruction === 'avx2' || cpuInstruction === 'avx512' + ? 'avx2' + : 'noavx', + gpuRunMode(gpuSetting), + cudaVersion(gpuSetting), + ] + .filter((e) => !!e) + .join('-') + + log(`[CORTEX]: Engine variant: ${engineVariant}`) + return engineVariant +} diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts index 3816605d2..a13bf6028 100644 --- a/extensions/inference-cortex-extension/src/node/index.ts +++ b/extensions/inference-cortex-extension/src/node/index.ts @@ -1,8 +1,7 @@ import path from 'path' import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node' -import { executableCortexFile } from './execute' +import { engineVariant, executableCortexFile } from './execute' import { ProcessWatchdog } from './watchdog' -import { appResourcePath } from '@janhq/core/node' // The HOST address to use for the Nitro subprocess const LOCAL_PORT = '39291' @@ -20,9 +19,9 @@ function run(systemInfo?: SystemInformation): Promise { // If ngl is not set or equal to 0, run on CPU with correct instructions systemInfo?.gpuSetting ? { - ...systemInfo.gpuSetting, - run_mode: systemInfo.gpuSetting.run_mode, - } + ...systemInfo.gpuSetting, + run_mode: systemInfo.gpuSetting.run_mode, + } : undefined ) @@ -30,16 +29,13 @@ function run(systemInfo?: SystemInformation): Promise { log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`) log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`) - addEnvPaths(path.join(appResourcePath(), 'shared')) - addEnvPaths(executableOptions.binPath) addEnvPaths(executableOptions.enginePath) - // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH - // This is required for the cortex engine to run for now since dlls are not moved to the root - addEnvPaths( - path.join(executableOptions.enginePath, 'engines', 'cortex.llamacpp') - ) const dataFolderPath = getJanDataFolderPath() + if (watchdog) { + watchdog.terminate() + } + watchdog = new ProcessWatchdog( executableOptions.executablePath, [ @@ -81,17 +77,12 @@ function dispose() { function addEnvPaths(dest: string) { // Add engine path to the PATH and LD_LIBRARY_PATH if (process.platform === 'win32') { - process.env.PATH = (process.env.PATH || '').concat( - path.delimiter, - dest, - ) - log(`[CORTEX] PATH: ${process.env.PATH}`) + process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest) } else { process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( path.delimiter, - dest, + dest ) - log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`) } } @@ -105,4 +96,5 @@ export interface CortexProcessInfo { export default { run, dispose, + engineVariant, } diff --git a/extensions/inference-openai-extension/package.json b/extensions/inference-openai-extension/package.json index 50fe12349..9700383d6 100644 --- a/extensions/inference-openai-extension/package.json +++ b/extensions/inference-openai-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-openai-extension", "productName": "OpenAI Inference Engine", - "version": "1.0.3", + "version": "1.0.4", "description": "This extension enables OpenAI chat completion API calls", "main": "dist/index.js", "module": "dist/module.js", diff --git a/extensions/inference-openai-extension/resources/models.json b/extensions/inference-openai-extension/resources/models.json index 124e123b9..a34bc5460 100644 --- a/extensions/inference-openai-extension/resources/models.json +++ b/extensions/inference-openai-extension/resources/models.json @@ -97,11 +97,10 @@ "format": "api", "settings": {}, "parameters": { - "max_tokens": 4096, - "temperature": 0.7, - "top_p": 0.95, + "temperature": 1, + "top_p": 1, "stream": true, - "stop": [], + "max_tokens": 32768, "frequency_penalty": 0, "presence_penalty": 0 }, @@ -125,11 +124,10 @@ "format": "api", "settings": {}, "parameters": { - "max_tokens": 4096, - "temperature": 0.7, - "top_p": 0.95, + "temperature": 1, + "top_p": 1, + "max_tokens": 65536, "stream": true, - "stop": [], "frequency_penalty": 0, "presence_penalty": 0 }, diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts index 64880b678..18bc4e0aa 100644 --- a/extensions/inference-openai-extension/src/index.ts +++ b/extensions/inference-openai-extension/src/index.ts @@ -76,11 +76,10 @@ export default class JanInferenceOpenAIExtension extends RemoteOAIEngine { transformPayload = (payload: OpenAIPayloadType): OpenAIPayloadType => { // Transform the payload for preview models if (this.previewModels.includes(payload.model)) { - const { max_tokens, temperature, top_p, stop, ...params } = payload + const { max_tokens, stop, ...params } = payload return { ...params, max_completion_tokens: max_tokens, - stream: false // o1 only support stream = false } } // Pass through for non-preview models diff --git a/extensions/inference-openrouter-extension/resources/models.json b/extensions/inference-openrouter-extension/resources/models.json index d89c07e5a..31dea8734 100644 --- a/extensions/inference-openrouter-extension/resources/models.json +++ b/extensions/inference-openrouter-extension/resources/models.json @@ -1,4 +1,4 @@ - [ +[ { "sources": [ { @@ -13,7 +13,7 @@ "format": "api", "settings": {}, "parameters": { - "max_tokens": 1024, + "max_tokens": 128000, "temperature": 0.7, "top_p": 0.95, "frequency_penalty": 0, diff --git a/extensions/inference-openrouter-extension/src/index.ts b/extensions/inference-openrouter-extension/src/index.ts index 75d1188a8..1b2cd014d 100644 --- a/extensions/inference-openrouter-extension/src/index.ts +++ b/extensions/inference-openrouter-extension/src/index.ts @@ -83,6 +83,6 @@ export default class JanInferenceOpenRouterExtension extends RemoteOAIEngine { transformPayload = (payload: PayloadType) => ({ ...payload, - model: this.model, + model: payload.model !== 'open-router-auto' ? payload.model : this.model, }) } diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json index bd834454a..ca563ff9f 100644 --- a/extensions/model-extension/package.json +++ b/extensions/model-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/model-extension", "productName": "Model Management", - "version": "1.0.34", + "version": "1.0.35", "description": "Model Management Extension provides model exploration and seamless downloads", "main": "dist/index.js", "author": "Jan ", diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts index 7a65e8e3f..26316fbbc 100644 --- a/extensions/model-extension/src/cortex.ts +++ b/extensions/model-extension/src/cortex.ts @@ -1,6 +1,6 @@ import PQueue from 'p-queue' import ky from 'ky' -import { extractModelLoadParams, Model } from '@janhq/core' +import { extractModelLoadParams, Model } from '@janhq/core' import { extractInferenceParams } from '@janhq/core' /** * cortex.cpp Model APIs interface @@ -18,6 +18,7 @@ interface ICortexAPI { deleteModel(model: string): Promise updateModel(model: object): Promise cancelModelPull(model: string): Promise + configs(body: { [key: string]: any }): Promise } type ModelList = { @@ -52,7 +53,7 @@ export class CortexAPI implements ICortexAPI { */ getModels(): Promise { return this.queue - .add(() => ky.get(`${API_URL}/models`).json()) + .add(() => ky.get(`${API_URL}/v1/models`).json()) .then((e) => typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : [] ) @@ -104,7 +105,7 @@ export class CortexAPI implements ICortexAPI { */ deleteModel(model: string): Promise { return this.queue.add(() => - ky.delete(`${API_URL}/models/${model}`).json().then() + ky.delete(`${API_URL}/v1/models/${model}`).json().then() ) } @@ -130,7 +131,7 @@ export class CortexAPI implements ICortexAPI { cancelModelPull(model: string): Promise { return this.queue.add(() => ky - .delete(`${API_URL}/models/pull`, { json: { taskId: model } }) + .delete(`${API_URL}/v1/models/pull`, { json: { taskId: model } }) .json() .then() ) @@ -142,7 +143,7 @@ export class CortexAPI implements ICortexAPI { */ async getModelStatus(model: string): Promise { return this.queue - .add(() => ky.get(`${API_URL}/models/status/${model}`)) + .add(() => ky.get(`${API_URL}/v1/models/status/${model}`)) .then((e) => true) .catch(() => false) } @@ -155,13 +156,24 @@ export class CortexAPI implements ICortexAPI { return ky .get(`${API_URL}/healthz`, { retry: { - limit: 10, + limit: 20, + delay: () => 500, methods: ['get'], }, }) .then(() => {}) } + /** + * Configure model pull options + * @param body + */ + configs(body: { [key: string]: any }): Promise { + return this.queue.add(() => + ky.patch(`${API_URL}/v1/configs`, { json: body }).then(() => {}) + ) + } + /** * TRansform model to the expected format (e.g. parameters, settings, metadata) * @param model diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts index b3ad2a012..e29084bc2 100644 --- a/extensions/model-extension/src/index.ts +++ b/extensions/model-extension/src/index.ts @@ -20,11 +20,8 @@ import { deleteModelFiles } from './legacy/delete' declare const SETTINGS: Array -/** - * Extension enum - */ -enum ExtensionEnum { - downloadedModels = 'downloadedModels', +export enum Settings { + huggingfaceToken = 'hugging-face-access-token', } /** @@ -40,15 +37,29 @@ export default class JanModelExtension extends ModelExtension { async onLoad() { this.registerSettings(SETTINGS) - // Try get models from cortex.cpp - this.getModels().then((models) => { - this.registerModels(models) - }) + // Configure huggingface token if available + const huggingfaceToken = await this.getSetting( + Settings.huggingfaceToken, + undefined + ) + if (huggingfaceToken) + this.cortexAPI.configs({ huggingface_token: huggingfaceToken }) // Listen to app download events this.handleDesktopEvents() } + /** + * Subscribe to settings update and make change accordingly + * @param key + * @param value + */ + onSettingUpdate(key: string, value: T): void { + if (key === Settings.huggingfaceToken) { + this.cortexAPI.configs({ huggingface_token: value }) + } + } + /** * Called when the extension is unloaded. * @override @@ -127,55 +138,43 @@ export default class JanModelExtension extends ModelExtension { * @returns A Promise that resolves with an array of all models. */ async getModels(): Promise { - /** - * In this action, if return empty array right away - * it would reset app cache and app will not function properly - * should compare and try import - */ - let currentModels: Model[] = [] - /** * Legacy models should be supported */ let legacyModels = await scanModelsFolder() - try { - if (!localStorage.getItem(ExtensionEnum.downloadedModels)) { - // Updated from an older version than 0.5.5 - // Scan through the models folder and import them (Legacy flow) - // Return models immediately - currentModels = legacyModels - } else { - currentModels = JSON.parse( - localStorage.getItem(ExtensionEnum.downloadedModels) - ) as Model[] - } - } catch (e) { - currentModels = [] - console.error(e) - } - /** * Here we are filtering out the models that are not imported * and are not using llama.cpp engine */ - var toImportModels = currentModels.filter( + var toImportModels = legacyModels.filter( (e) => e.engine === InferenceEngine.nitro ) - await this.cortexAPI.getModels().then((models) => { - const existingIds = models.map((e) => e.id) - toImportModels = toImportModels.filter( - (e: Model) => !existingIds.includes(e.id) && !e.settings?.vision_model + /** + * Fetch models from cortex.cpp + */ + var fetchedModels = await this.cortexAPI.getModels().catch(() => []) + + // Checking if there are models to import + const existingIds = fetchedModels.map((e) => e.id) + toImportModels = toImportModels.filter( + (e: Model) => !existingIds.includes(e.id) && !e.settings?.vision_model + ) + + /** + * There is no model to import + * just return fetched models + */ + if (!toImportModels.length) + return fetchedModels.concat( + legacyModels.filter((e) => !fetchedModels.some((x) => x.id === e.id)) ) - }) console.log('To import models:', toImportModels.length) /** * There are models to import - * do not return models from cortex.cpp yet - * otherwise it will reset the app cache - * */ + */ if (toImportModels.length > 0) { // Import models await Promise.all( @@ -193,17 +192,19 @@ export default class JanModelExtension extends ModelExtension { ]) // Copied models : model.sources[0].url, // Symlink models, model.name - ).then((e) => { - this.updateModel({ - id: model.id, - ...model.settings, - ...model.parameters, - } as Partial) - }) + ) + .then((e) => { + this.updateModel({ + id: model.id, + ...model.settings, + ...model.parameters, + } as Partial) + }) + .catch((e) => { + console.debug(e) + }) }) ) - - return currentModels } /** @@ -252,6 +253,13 @@ export default class JanModelExtension extends ModelExtension { return this.cortexAPI.getModelStatus(model) } + /** + * Configure pull options such as proxy, headers, etc. + */ + async configurePullOptions(options: { [key: string]: any }): Promise { + return this.cortexAPI.configs(options).catch((e) => console.debug(e)) + } + /** * Handle download state from main app */ diff --git a/extensions/model-extension/src/legacy/delete.ts b/extensions/model-extension/src/legacy/delete.ts index 5288e30ee..43fa56d69 100644 --- a/extensions/model-extension/src/legacy/delete.ts +++ b/extensions/model-extension/src/legacy/delete.ts @@ -1,10 +1,12 @@ -import { fs, joinPath } from '@janhq/core' +import { dirName, fs } from '@janhq/core' +import { scanModelsFolder } from './model-json' export const deleteModelFiles = async (id: string) => { try { - const dirPath = await joinPath(['file://models', id]) + const models = await scanModelsFolder() + const dirPath = models.find((e) => e.id === id)?.file_path // remove model folder directory - await fs.rm(dirPath) + if (dirPath) await fs.rm(await dirName(dirPath)) } catch (err) { console.error(err) } diff --git a/extensions/model-extension/src/legacy/model-json.ts b/extensions/model-extension/src/legacy/model-json.ts index 3cad6014b..03560cde2 100644 --- a/extensions/model-extension/src/legacy/model-json.ts +++ b/extensions/model-extension/src/legacy/model-json.ts @@ -12,7 +12,9 @@ const LocalEngines = [ * Scan through models folder and return downloaded models * @returns */ -export const scanModelsFolder = async (): Promise => { +export const scanModelsFolder = async (): Promise< + (Model & { file_path?: string })[] +> => { const _homeDir = 'file://models' try { if (!(await fs.existsSync(_homeDir))) { @@ -37,7 +39,7 @@ export const scanModelsFolder = async (): Promise => { const jsonPath = await getModelJsonPath(folderFullPath) - if (await fs.existsSync(jsonPath)) { + if (jsonPath && (await fs.existsSync(jsonPath))) { // if we have the model.json file, read it let model = await fs.readFileSync(jsonPath, 'utf-8') @@ -83,7 +85,10 @@ export const scanModelsFolder = async (): Promise => { file.toLowerCase().endsWith('.gguf') || // GGUF file.toLowerCase().endsWith('.engine') // Tensort-LLM ) - })?.length >= (model.engine === InferenceEngine.nitro_tensorrt_llm ? 1 : (model.sources?.length ?? 1)) + })?.length >= + (model.engine === InferenceEngine.nitro_tensorrt_llm + ? 1 + : (model.sources?.length ?? 1)) ) }) diff --git a/extensions/monitoring-extension/src/index.ts b/extensions/monitoring-extension/src/index.ts index 1d21fde77..eca71326e 100644 --- a/extensions/monitoring-extension/src/index.ts +++ b/extensions/monitoring-extension/src/index.ts @@ -1,7 +1,9 @@ import { + AppConfigurationEventName, GpuSetting, MonitoringExtension, OperatingSystemInfo, + events, executeOnMain, } from '@janhq/core' @@ -37,6 +39,7 @@ export default class JanMonitoringExtension extends MonitoringExtension { // Attempt to fetch nvidia info await executeOnMain(NODE, 'updateNvidiaInfo') + events.emit(AppConfigurationEventName.OnConfigurationUpdate, {}) } onSettingUpdate(key: string, value: T): void { diff --git a/extensions/monitoring-extension/src/node/index.ts b/extensions/monitoring-extension/src/node/index.ts index 980ee75d1..e32f85082 100644 --- a/extensions/monitoring-extension/src/node/index.ts +++ b/extensions/monitoring-extension/src/node/index.ts @@ -259,15 +259,15 @@ const updateGpuInfo = async () => data.gpu_highest_vram = highestVramId } else { data.gpus = [] - data.gpu_highest_vram = '' + data.gpu_highest_vram = undefined } if (!data.gpus_in_use || data.gpus_in_use.length === 0) { - data.gpus_in_use = [data.gpu_highest_vram] + data.gpus_in_use = data.gpu_highest_vram ? [data.gpu_highest_vram].filter(e => !!e) : [] } data = await updateCudaExistence(data) - console.log(data) + console.log('[MONITORING]::Cuda info: ', data) writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2)) log(`[APP]::${JSON.stringify(data)}`) resolve({}) @@ -344,7 +344,7 @@ const updateCudaExistence = async ( data.cuda.version = match[1] } } - console.log(data) + console.log('[MONITORING]::Finalized cuda info update: ', data) resolve() }) }) diff --git a/joi/src/core/Slider/index.tsx b/joi/src/core/Slider/index.tsx index 40e0c3977..7f8c6cb89 100644 --- a/joi/src/core/Slider/index.tsx +++ b/joi/src/core/Slider/index.tsx @@ -1,5 +1,6 @@ import React from 'react' import * as SliderPrimitive from '@radix-ui/react-slider' +import { twMerge } from 'tailwind-merge' import './styles.scss' @@ -25,7 +26,7 @@ const Slider = ({ disabled, }: Props) => ( - - - + + diff --git a/web/containers/EngineSetting/index.tsx b/web/containers/EngineSetting/index.tsx index acbd507ce..0ae2929bf 100644 --- a/web/containers/EngineSetting/index.tsx +++ b/web/containers/EngineSetting/index.tsx @@ -4,7 +4,10 @@ import SettingComponentBuilder from '@/containers/ModelSetting/SettingComponent' type Props = { componentData: SettingComponentProps[] - onValueChanged: (key: string, value: string | number | boolean) => void + onValueChanged: ( + key: string, + value: string | number | boolean | string[] + ) => void disabled?: boolean } diff --git a/web/containers/ErrorMessage/index.test.tsx b/web/containers/ErrorMessage/index.test.tsx index d2ae5aa81..306a80e32 100644 --- a/web/containers/ErrorMessage/index.test.tsx +++ b/web/containers/ErrorMessage/index.test.tsx @@ -63,9 +63,6 @@ describe('ErrorMessage Component', () => { render() - expect( - screen.getByText('Apologies, something’s amiss!') - ).toBeInTheDocument() expect(screen.getByText('troubleshooting assistance')).toBeInTheDocument() }) diff --git a/web/containers/ErrorMessage/index.tsx b/web/containers/ErrorMessage/index.tsx index 18558c1d8..532f02259 100644 --- a/web/containers/ErrorMessage/index.tsx +++ b/web/containers/ErrorMessage/index.tsx @@ -27,11 +27,8 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => { const getErrorTitle = () => { switch (message.error_code) { - case ErrorCode.Unknown: - return 'Apologies, something’s amiss!' case ErrorCode.InvalidApiKey: case ErrorCode.AuthenticationError: - case ErrorCode.InvalidRequestError: return ( Invalid API key. Please check your API key from{' '} @@ -55,17 +52,17 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => { ) default: return ( - <> +

{message.content[0]?.text?.value && ( )} - +

) } } return ( -
+
{message.status === MessageStatus.Error && (
{ - const [mainViewState, setMainViewState] = useAtom(mainViewStateAtom) + const setMainViewState = useSetAtom(mainViewStateAtom) const importModelStage = useAtomValue(getImportModelStageAtom) const reduceTransparent = useAtomValue(reduceTransparentAtom) @@ -68,24 +66,7 @@ const BaseLayout = () => {
-
-
- - - -
-
+ {importModelStage === 'SELECTING_MODEL' && } {importModelStage === 'MODEL_SELECTED' && } diff --git a/web/containers/ListContainer/index.tsx b/web/containers/ListContainer/index.tsx index bd650e315..44e5b2527 100644 --- a/web/containers/ListContainer/index.tsx +++ b/web/containers/ListContainer/index.tsx @@ -1,15 +1,30 @@ -import { ReactNode, useCallback, useEffect, useRef } from 'react' +import { PropsWithChildren, useCallback, useEffect, useRef } from 'react' import { ScrollArea } from '@janhq/joi' -type Props = { - children: ReactNode -} +import { useAtomValue } from 'jotai' -const ListContainer = ({ children }: Props) => { +import { activeThreadAtom } from '@/helpers/atoms/Thread.atom' + +const ListContainer = ({ children }: PropsWithChildren) => { const listRef = useRef(null) const prevScrollTop = useRef(0) const isUserManuallyScrollingUp = useRef(false) + const activeThread = useAtomValue(activeThreadAtom) + const prevActiveThread = useRef(activeThread) + + // Handle active thread changes + useEffect(() => { + if (prevActiveThread.current?.id !== activeThread?.id) { + isUserManuallyScrollingUp.current = false + const scrollHeight = listRef.current?.scrollHeight ?? 0 + listRef.current?.scrollTo({ + top: scrollHeight, + behavior: 'instant', + }) + prevActiveThread.current = activeThread // Update the previous active thread reference + } + }, [activeThread]) const handleScroll = useCallback((event: React.UIEvent) => { const currentScrollTop = event.currentTarget.scrollTop @@ -25,6 +40,11 @@ const ListContainer = ({ children }: Props) => { isUserManuallyScrollingUp.current = false } } + + if (isUserManuallyScrollingUp.current === true) { + event.preventDefault() + event.stopPropagation() + } prevScrollTop.current = currentScrollTop }, []) diff --git a/web/containers/MainViewContainer/index.tsx b/web/containers/MainViewContainer/index.tsx index 4f3b4986a..ba7f87fd2 100644 --- a/web/containers/MainViewContainer/index.tsx +++ b/web/containers/MainViewContainer/index.tsx @@ -1,5 +1,10 @@ +import { memo } from 'react' + +import { motion as m } from 'framer-motion' import { useAtomValue } from 'jotai' +import { twMerge } from 'tailwind-merge' + import { MainViewState } from '@/constants/screens' import HubScreen from '@/screens/Hub' @@ -31,7 +36,26 @@ const MainViewContainer = () => { break } - return children + return ( +
+
+ + {children} + +
+
+ ) } -export default MainViewContainer +export default memo(MainViewContainer) diff --git a/web/containers/ModelConfigInput/index.test.tsx b/web/containers/ModelConfigInput/index.test.tsx index b92bdfcb2..cf9cb9da3 100644 --- a/web/containers/ModelConfigInput/index.test.tsx +++ b/web/containers/ModelConfigInput/index.test.tsx @@ -2,7 +2,6 @@ import '@testing-library/jest-dom' import React from 'react' import { render, fireEvent } from '@testing-library/react' import ModelConfigInput from './index' -import { Tooltip } from '@janhq/joi' // Mocking the Tooltip component to simplify testing jest.mock('@janhq/joi', () => ({ diff --git a/web/containers/ModelConfigInput/index.tsx b/web/containers/ModelConfigInput/index.tsx index f0e6ea1f2..e67080df2 100644 --- a/web/containers/ModelConfigInput/index.tsx +++ b/web/containers/ModelConfigInput/index.tsx @@ -19,28 +19,30 @@ const ModelConfigInput = ({ description, placeholder, onValueChanged, -}: Props) => ( -
-
-

{title}

- - } - content={description} +}: Props) => { + return ( +
+
+

{title}

+ + } + content={description} + /> +
+