diff --git a/.github/workflows/jan-electron-build-beta.yml b/.github/workflows/jan-electron-build-beta.yml index 9cae31d67..4f9886010 100644 --- a/.github/workflows/jan-electron-build-beta.yml +++ b/.github/workflows/jan-electron-build-beta.yml @@ -9,31 +9,6 @@ jobs: get-update-version: uses: ./.github/workflows/template-get-update-version.yml - create-draft-release: - runs-on: ubuntu-latest - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') - outputs: - upload_url: ${{ steps.create_release.outputs.upload_url }} - version: ${{ steps.get_version.outputs.version }} - permissions: - contents: write - steps: - - name: Extract tag name without v prefix - id: get_version - run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}" - env: - GITHUB_REF: ${{ github.ref }} - - name: Create Draft Release - id: create_release - uses: softprops/action-gh-release@v2 - with: - tag_name: ${{ github.ref_name }} - token: ${{ secrets.GITHUB_TOKEN }} - name: "${{ env.VERSION }}" - draft: true - prerelease: false - generate_release_notes: true - build-macos: uses: ./.github/workflows/template-build-macos.yml secrets: inherit @@ -65,7 +40,7 @@ jobs: beta: true sync-temp-to-latest: - needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64] + needs: [build-macos, build-windows-x64, build-linux-x64] runs-on: ubuntu-latest permissions: contents: write @@ -82,19 +57,15 @@ jobs: AWS_DEFAULT_REGION: ${{ secrets.DELTA_AWS_REGION }} AWS_EC2_METADATA_DISABLED: "true" - - name: set release to prerelease - run: | - gh release edit v${{ needs.create-draft-release.outputs.version }} --draft=false --prerelease - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - noti-discord-and-update-url-readme: - needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64, sync-temp-to-latest] + needs: [build-macos, get-update-version, build-windows-x64, build-linux-x64, sync-temp-to-latest] runs-on: ubuntu-latest steps: - name: Set version to environment variable run: | - echo "VERSION=${{ needs.create-draft-release.outputs.version }}" >> $GITHUB_ENV + VERSION=${{ needs.get-update-version.outputs.new_version }} + VERSION="${VERSION#v}" + echo "VERSION=$VERSION" >> $GITHUB_ENV - name: Notify Discord uses: Ilshidur/action-discord@master @@ -105,6 +76,5 @@ jobs: - macOS Universal: https://delta.jan.ai/beta/jan-beta-mac-universal-{{ VERSION }}.dmg - Linux Deb: https://delta.jan.ai/beta/jan-beta-linux-amd64-{{ VERSION }}.deb - Linux AppImage: https://delta.jan.ai/beta/jan-beta-linux-x86_64-{{ VERSION }}.AppImage - - Github Release URL: https://github.com/janhq/jan/releases/tag/v{{ VERSION }} env: DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_JAN_BETA }} \ No newline at end of file diff --git a/.github/workflows/template-build-jan-server.yml b/.github/workflows/template-build-jan-server.yml deleted file mode 100644 index 9bb772605..000000000 --- a/.github/workflows/template-build-jan-server.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: build-jan-server -on: - workflow_call: - inputs: - dockerfile_path: - required: false - type: string - default: './Dockerfile' - docker_image_tag: - required: true - type: string - default: 'ghcr.io/janhq/jan-server:dev-latest' - -jobs: - build: - runs-on: ubuntu-latest - env: - REGISTRY: ghcr.io - IMAGE_NAME: janhq/jan-server - permissions: - packages: write - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Log in to the Container registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push Docker image - uses: docker/build-push-action@v3 - with: - context: . - file: ${{ inputs.dockerfile_path }} - push: true - tags: ${{ inputs.docker_image_tag }} \ No newline at end of file diff --git a/.github/workflows/template-build-linux-x64.yml b/.github/workflows/template-build-linux-x64.yml index 85b050e62..68079d0dc 100644 --- a/.github/workflows/template-build-linux-x64.yml +++ b/.github/workflows/template-build-linux-x64.yml @@ -83,7 +83,7 @@ jobs: cat ./electron/package.json echo "------------------------" cat ./package.json - jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json + jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json mv /tmp/package.json electron/package.json cat electron/package.json diff --git a/.github/workflows/template-build-macos.yml b/.github/workflows/template-build-macos.yml index 2eabd9ce2..6bc38f786 100644 --- a/.github/workflows/template-build-macos.yml +++ b/.github/workflows/template-build-macos.yml @@ -99,7 +99,7 @@ jobs: cat ./electron/package.json echo "------------------------" cat ./package.json - jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json + jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json mv /tmp/package.json electron/package.json cat electron/package.json diff --git a/.github/workflows/template-build-windows-x64.yml b/.github/workflows/template-build-windows-x64.yml index a317b4960..9e3171e03 100644 --- a/.github/workflows/template-build-windows-x64.yml +++ b/.github/workflows/template-build-windows-x64.yml @@ -108,7 +108,7 @@ jobs: cat ./package.json echo "------------------------" cat ./electron/scripts/uninstaller.nsh - jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json + jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json mv /tmp/package.json electron/package.json cat electron/package.json diff --git a/core/src/browser/extension.ts b/core/src/browser/extension.ts index 1d641980b..d768473c9 100644 --- a/core/src/browser/extension.ts +++ b/core/src/browser/extension.ts @@ -12,6 +12,7 @@ export enum ExtensionTypeEnum { SystemMonitoring = 'systemMonitoring', HuggingFace = 'huggingFace', Engine = 'engine', + Hardware = 'hardware', } export interface ExtensionType { diff --git a/core/src/browser/extensions/engines/OAIEngine.test.ts b/core/src/browser/extensions/engines/OAIEngine.test.ts index 81348786c..66537d0be 100644 --- a/core/src/browser/extensions/engines/OAIEngine.test.ts +++ b/core/src/browser/extensions/engines/OAIEngine.test.ts @@ -38,8 +38,14 @@ describe('OAIEngine', () => { it('should subscribe to events on load', () => { engine.onLoad() - expect(events.on).toHaveBeenCalledWith(MessageEvent.OnMessageSent, expect.any(Function)) - expect(events.on).toHaveBeenCalledWith(InferenceEvent.OnInferenceStopped, expect.any(Function)) + expect(events.on).toHaveBeenCalledWith( + MessageEvent.OnMessageSent, + expect.any(Function) + ) + expect(events.on).toHaveBeenCalledWith( + InferenceEvent.OnInferenceStopped, + expect.any(Function) + ) }) it('should handle inference request', async () => { @@ -77,7 +83,12 @@ describe('OAIEngine', () => { expect(events.emit).toHaveBeenCalledWith( MessageEvent.OnMessageUpdate, expect.objectContaining({ - content: [{ type: ContentType.Text, text: { value: 'test response', annotations: [] } }], + content: [ + { + type: ContentType.Text, + text: { value: 'test response', annotations: [] }, + }, + ], status: MessageStatus.Ready, }) ) @@ -101,11 +112,10 @@ describe('OAIEngine', () => { await engine.inference(data) - expect(events.emit).toHaveBeenCalledWith( + expect(events.emit).toHaveBeenLastCalledWith( MessageEvent.OnMessageUpdate, expect.objectContaining({ - content: [{ type: ContentType.Text, text: { value: 'test error', annotations: [] } }], - status: MessageStatus.Error, + status: 'error', error_code: 500, }) ) diff --git a/core/src/browser/extensions/engines/OAIEngine.ts b/core/src/browser/extensions/engines/OAIEngine.ts index 6b4c20a19..61032357c 100644 --- a/core/src/browser/extensions/engines/OAIEngine.ts +++ b/core/src/browser/extensions/engines/OAIEngine.ts @@ -42,7 +42,9 @@ export abstract class OAIEngine extends AIEngine { */ override onLoad() { super.onLoad() - events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data)) + events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => + this.inference(data) + ) events.on(InferenceEvent.OnInferenceStopped, () => this.stopInference()) } @@ -128,7 +130,9 @@ export abstract class OAIEngine extends AIEngine { events.emit(MessageEvent.OnMessageUpdate, message) }, complete: async () => { - message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error + message.status = message.content.length + ? MessageStatus.Ready + : MessageStatus.Error events.emit(MessageEvent.OnMessageUpdate, message) }, error: async (err: any) => { @@ -141,7 +145,10 @@ export abstract class OAIEngine extends AIEngine { message.content[0] = { type: ContentType.Text, text: { - value: err.message, + value: + typeof message === 'string' + ? err.message + : (JSON.stringify(err.message) ?? err.detail), annotations: [], }, } diff --git a/core/src/browser/extensions/engines/helpers/sse.test.ts b/core/src/browser/extensions/engines/helpers/sse.test.ts index 0b78aa9b5..f8c2ac6b4 100644 --- a/core/src/browser/extensions/engines/helpers/sse.test.ts +++ b/core/src/browser/extensions/engines/helpers/sse.test.ts @@ -1,14 +1,17 @@ import { lastValueFrom, Observable } from 'rxjs' import { requestInference } from './sse' -import { ReadableStream } from 'stream/web'; +import { ReadableStream } from 'stream/web' describe('requestInference', () => { it('should send a request to the inference server and return an Observable', () => { // Mock the fetch function const mockFetch: any = jest.fn(() => Promise.resolve({ ok: true, - json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }), + json: () => + Promise.resolve({ + choices: [{ message: { content: 'Generated response' } }], + }), headers: new Headers(), redirected: false, status: 200, @@ -36,7 +39,10 @@ describe('requestInference', () => { const mockFetch: any = jest.fn(() => Promise.resolve({ ok: false, - json: () => Promise.resolve({ error: { message: 'Wrong API Key', code: 'invalid_api_key' } }), + json: () => + Promise.resolve({ + error: { message: 'Invalid API Key.', code: 'invalid_api_key' }, + }), headers: new Headers(), redirected: false, status: 401, @@ -56,69 +62,85 @@ describe('requestInference', () => { // Assert the expected behavior expect(result).toBeInstanceOf(Observable) - expect(lastValueFrom(result)).rejects.toEqual({ message: 'Wrong API Key', code: 'invalid_api_key' }) + expect(lastValueFrom(result)).rejects.toEqual({ + message: 'Invalid API Key.', + code: 'invalid_api_key', + }) }) }) - it('should handle a successful response with a transformResponse function', () => { - // Mock the fetch function - const mockFetch: any = jest.fn(() => - Promise.resolve({ - ok: true, - json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }), - headers: new Headers(), - redirected: false, - status: 200, - statusText: 'OK', - }) - ) - jest.spyOn(global, 'fetch').mockImplementation(mockFetch) - - // Define the test inputs - const inferenceUrl = 'https://inference-server.com' - const requestBody = { message: 'Hello' } - const model = { id: 'model-id', parameters: { stream: false } } - const transformResponse = (data: any) => data.choices[0].message.content.toUpperCase() - - // Call the function - const result = requestInference(inferenceUrl, requestBody, model, undefined, undefined, transformResponse) - - // Assert the expected behavior - expect(result).toBeInstanceOf(Observable) - expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE') - }) - - - it('should handle a successful response with streaming enabled', () => { - // Mock the fetch function - const mockFetch: any = jest.fn(() => - Promise.resolve({ - ok: true, - body: new ReadableStream({ - start(controller) { - controller.enqueue(new TextEncoder().encode('data: {"choices": [{"delta": {"content": "Streamed"}}]}')); - controller.enqueue(new TextEncoder().encode('data: [DONE]')); - controller.close(); - } +it('should handle a successful response with a transformResponse function', () => { + // Mock the fetch function + const mockFetch: any = jest.fn(() => + Promise.resolve({ + ok: true, + json: () => + Promise.resolve({ + choices: [{ message: { content: 'Generated response' } }], }), - headers: new Headers(), - redirected: false, - status: 200, - statusText: 'OK', - }) - ); - jest.spyOn(global, 'fetch').mockImplementation(mockFetch); - - // Define the test inputs - const inferenceUrl = 'https://inference-server.com'; - const requestBody = { message: 'Hello' }; - const model = { id: 'model-id', parameters: { stream: true } }; - - // Call the function - const result = requestInference(inferenceUrl, requestBody, model); - - // Assert the expected behavior - expect(result).toBeInstanceOf(Observable); - expect(lastValueFrom(result)).resolves.toEqual('Streamed'); - }); + headers: new Headers(), + redirected: false, + status: 200, + statusText: 'OK', + }) + ) + jest.spyOn(global, 'fetch').mockImplementation(mockFetch) + // Define the test inputs + const inferenceUrl = 'https://inference-server.com' + const requestBody = { message: 'Hello' } + const model = { id: 'model-id', parameters: { stream: false } } + const transformResponse = (data: any) => + data.choices[0].message.content.toUpperCase() + + // Call the function + const result = requestInference( + inferenceUrl, + requestBody, + model, + undefined, + undefined, + transformResponse + ) + + // Assert the expected behavior + expect(result).toBeInstanceOf(Observable) + expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE') +}) + +it('should handle a successful response with streaming enabled', () => { + // Mock the fetch function + const mockFetch: any = jest.fn(() => + Promise.resolve({ + ok: true, + body: new ReadableStream({ + start(controller) { + controller.enqueue( + new TextEncoder().encode( + 'data: {"choices": [{"delta": {"content": "Streamed"}}]}' + ) + ) + controller.enqueue(new TextEncoder().encode('data: [DONE]')) + controller.close() + }, + }), + headers: new Headers(), + redirected: false, + status: 200, + statusText: 'OK', + }) + ) + jest.spyOn(global, 'fetch').mockImplementation(mockFetch) + + // Define the test inputs + const inferenceUrl = 'https://inference-server.com' + const requestBody = { message: 'Hello' } + const model = { id: 'model-id', parameters: { stream: true } } + + // Call the function + const result = requestInference(inferenceUrl, requestBody, model) + + // Assert the expected behavior + expect(result).toBeInstanceOf(Observable) + expect(lastValueFrom(result)).resolves.toEqual('Streamed') +}) diff --git a/core/src/browser/extensions/engines/helpers/sse.ts b/core/src/browser/extensions/engines/helpers/sse.ts index 55cde56b4..5c63008ff 100644 --- a/core/src/browser/extensions/engines/helpers/sse.ts +++ b/core/src/browser/extensions/engines/helpers/sse.ts @@ -32,20 +32,19 @@ export function requestInference( }) .then(async (response) => { if (!response.ok) { - const data = await response.json() - let errorCode = ErrorCode.Unknown - if (data.error) { - errorCode = data.error.code ?? data.error.type ?? ErrorCode.Unknown - } else if (response.status === 401) { - errorCode = ErrorCode.InvalidApiKey + if (response.status === 401) { + throw { + code: ErrorCode.InvalidApiKey, + message: 'Invalid API Key.', + } } - const error = { - message: data.error?.message ?? data.message ?? 'Error occurred.', - code: errorCode, + let data = await response.json() + try { + handleError(data) + } catch (err) { + subscriber.error(err) + return } - subscriber.error(error) - subscriber.complete() - return } // There could be overriden stream parameter in the model // that is set in request body (transformed payload) @@ -54,9 +53,10 @@ export function requestInference( model.parameters?.stream === false ) { const data = await response.json() - if (data.error || data.message) { - subscriber.error(data.error ?? data) - subscriber.complete() + try { + handleError(data) + } catch (err) { + subscriber.error(err) return } if (transformResponse) { @@ -91,13 +91,10 @@ export function requestInference( const toParse = cachedLines + line if (!line.includes('data: [DONE]')) { const data = JSON.parse(toParse.replace('data: ', '')) - if ( - 'error' in data || - 'message' in data || - 'detail' in data - ) { - subscriber.error(data.error ?? data) - subscriber.complete() + try { + handleError(data) + } catch (err) { + subscriber.error(err) return } content += data.choices[0]?.delta?.content ?? '' @@ -118,3 +115,18 @@ export function requestInference( .catch((err) => subscriber.error(err)) }) } + +/** + * Handle error and normalize it to a common format. + * @param data + */ +const handleError = (data: any) => { + if ( + data.error || + data.message || + data.detail || + (Array.isArray(data) && data.length && data[0].error) + ) { + throw data.error ?? data[0]?.error ?? data + } +} diff --git a/core/src/browser/extensions/hardwareManagement.ts b/core/src/browser/extensions/hardwareManagement.ts new file mode 100644 index 000000000..1f7c36287 --- /dev/null +++ b/core/src/browser/extensions/hardwareManagement.ts @@ -0,0 +1,26 @@ +import { HardwareInformation } from '../../types' +import { BaseExtension, ExtensionTypeEnum } from '../extension' + +/** + * Engine management extension. Persists and retrieves engine management. + * @abstract + * @extends BaseExtension + */ +export abstract class HardwareManagementExtension extends BaseExtension { + type(): ExtensionTypeEnum | undefined { + return ExtensionTypeEnum.Hardware + } + + /** + * @returns A Promise that resolves to an object of list hardware. + */ + abstract getHardware(): Promise + + /** + * @returns A Promise that resolves to an object of set active gpus. + */ + abstract setAvtiveGpu(data: { gpus: number[] }): Promise<{ + message: string + activated_gpus: number[] + }> +} diff --git a/core/src/browser/extensions/index.test.ts b/core/src/browser/extensions/index.test.ts index 26cbda8c5..bc5a7c358 100644 --- a/core/src/browser/extensions/index.test.ts +++ b/core/src/browser/extensions/index.test.ts @@ -1,6 +1,5 @@ import { ConversationalExtension } from './index'; import { InferenceExtension } from './index'; -import { MonitoringExtension } from './index'; import { AssistantExtension } from './index'; import { ModelExtension } from './index'; import * as Engines from './index'; @@ -14,10 +13,6 @@ describe('index.ts exports', () => { expect(InferenceExtension).toBeDefined(); }); - test('should export MonitoringExtension', () => { - expect(MonitoringExtension).toBeDefined(); - }); - test('should export AssistantExtension', () => { expect(AssistantExtension).toBeDefined(); }); @@ -29,4 +24,4 @@ describe('index.ts exports', () => { test('should export Engines', () => { expect(Engines).toBeDefined(); }); -}); \ No newline at end of file +}); diff --git a/core/src/browser/extensions/index.ts b/core/src/browser/extensions/index.ts index 9dbfe1afe..f11c7b09f 100644 --- a/core/src/browser/extensions/index.ts +++ b/core/src/browser/extensions/index.ts @@ -9,10 +9,7 @@ export { ConversationalExtension } from './conversational' */ export { InferenceExtension } from './inference' -/** - * Monitoring extension for system monitoring. - */ -export { MonitoringExtension } from './monitoring' + /** * Assistant extension for managing assistants. @@ -33,3 +30,8 @@ export * from './engines' * Engines Management */ export * from './enginesManagement' + +/** + * Hardware Management + */ +export * from './hardwareManagement' diff --git a/core/src/browser/extensions/monitoring.test.ts b/core/src/browser/extensions/monitoring.test.ts deleted file mode 100644 index 9bba89a8c..000000000 --- a/core/src/browser/extensions/monitoring.test.ts +++ /dev/null @@ -1,42 +0,0 @@ - -import { ExtensionTypeEnum } from '../extension'; -import { MonitoringExtension } from './monitoring'; - -it('should have the correct type', () => { - class TestMonitoringExtension extends MonitoringExtension { - getGpuSetting(): Promise { - throw new Error('Method not implemented.'); - } - getResourcesInfo(): Promise { - throw new Error('Method not implemented.'); - } - getCurrentLoad(): Promise { - throw new Error('Method not implemented.'); - } - getOsInfo(): Promise { - throw new Error('Method not implemented.'); - } - } - const monitoringExtension = new TestMonitoringExtension(); - expect(monitoringExtension.type()).toBe(ExtensionTypeEnum.SystemMonitoring); -}); - - -it('should create an instance of MonitoringExtension', () => { - class TestMonitoringExtension extends MonitoringExtension { - getGpuSetting(): Promise { - throw new Error('Method not implemented.'); - } - getResourcesInfo(): Promise { - throw new Error('Method not implemented.'); - } - getCurrentLoad(): Promise { - throw new Error('Method not implemented.'); - } - getOsInfo(): Promise { - throw new Error('Method not implemented.'); - } - } - const monitoringExtension = new TestMonitoringExtension(); - expect(monitoringExtension).toBeInstanceOf(MonitoringExtension); -}); diff --git a/core/src/browser/extensions/monitoring.ts b/core/src/browser/extensions/monitoring.ts deleted file mode 100644 index cb544b6b7..000000000 --- a/core/src/browser/extensions/monitoring.ts +++ /dev/null @@ -1,20 +0,0 @@ -import { BaseExtension, ExtensionTypeEnum } from '../extension' -import { GpuSetting, MonitoringInterface, OperatingSystemInfo } from '../../types' - -/** - * Monitoring extension for system monitoring. - * @extends BaseExtension - */ -export abstract class MonitoringExtension extends BaseExtension implements MonitoringInterface { - /** - * Monitoring extension type. - */ - type(): ExtensionTypeEnum | undefined { - return ExtensionTypeEnum.SystemMonitoring - } - - abstract getGpuSetting(): Promise - abstract getResourcesInfo(): Promise - abstract getCurrentLoad(): Promise - abstract getOsInfo(): Promise -} diff --git a/core/src/types/config/appConfigEntity.ts b/core/src/types/config/appConfigEntity.ts index 1402aeca1..bd352d22f 100644 --- a/core/src/types/config/appConfigEntity.ts +++ b/core/src/types/config/appConfigEntity.ts @@ -1,4 +1,5 @@ export type AppConfiguration = { data_folder: string quick_ask: boolean + distinct_id?: string } diff --git a/core/src/types/engine/index.ts b/core/src/types/engine/index.ts index 7c848a279..9a6beeeff 100644 --- a/core/src/types/engine/index.ts +++ b/core/src/types/engine/index.ts @@ -18,6 +18,7 @@ export type EngineMetadata = { template?: string } } + explore_models_url?: string } export type EngineVariant = { diff --git a/core/src/types/hardware/index.ts b/core/src/types/hardware/index.ts new file mode 100644 index 000000000..d154a4417 --- /dev/null +++ b/core/src/types/hardware/index.ts @@ -0,0 +1,55 @@ +export type Cpu = { + arch: string + cores: number + instructions: string[] + model: string + usage: number +} + +export type GpuAdditionalInformation = { + compute_cap: string + driver_version: string +} + +export type Gpu = { + activated: boolean + additional_information?: GpuAdditionalInformation + free_vram: number + id: string + name: string + total_vram: number + uuid: string + version: string +} + +export type Os = { + name: string + version: string +} + +export type Power = { + battery_life: number + charging_status: string + is_power_saving: boolean +} + +export type Ram = { + available: number + total: number + type: string +} + +export type Storage = { + available: number + total: number + type: string +} + +export type HardwareInformation = { + cpu: Cpu + gpus: Gpu[] + os: Os + power: Power + ram: Ram + storage: Storage +} diff --git a/core/src/types/index.test.ts b/core/src/types/index.test.ts index 9dc001c4d..d938feee9 100644 --- a/core/src/types/index.test.ts +++ b/core/src/types/index.test.ts @@ -4,7 +4,6 @@ import * as model from './model'; import * as thread from './thread'; import * as message from './message'; import * as inference from './inference'; -import * as monitoring from './monitoring'; import * as file from './file'; import * as config from './config'; import * as huggingface from './huggingface'; @@ -18,7 +17,6 @@ import * as setting from './setting'; expect(thread).toBeDefined(); expect(message).toBeDefined(); expect(inference).toBeDefined(); - expect(monitoring).toBeDefined(); expect(file).toBeDefined(); expect(config).toBeDefined(); expect(huggingface).toBeDefined(); diff --git a/core/src/types/index.ts b/core/src/types/index.ts index e30dd18c3..3d262a6b7 100644 --- a/core/src/types/index.ts +++ b/core/src/types/index.ts @@ -3,7 +3,6 @@ export * from './model' export * from './thread' export * from './message' export * from './inference' -export * from './monitoring' export * from './file' export * from './config' export * from './huggingface' @@ -11,3 +10,4 @@ export * from './miscellaneous' export * from './api' export * from './setting' export * from './engine' +export * from './hardware' diff --git a/core/src/types/miscellaneous/systemResourceInfo.ts b/core/src/types/miscellaneous/systemResourceInfo.ts index 82db5d941..9361b79b6 100644 --- a/core/src/types/miscellaneous/systemResourceInfo.ts +++ b/core/src/types/miscellaneous/systemResourceInfo.ts @@ -1,33 +1,25 @@ +import { GpuAdditionalInformation } from '../hardware' + export type SystemResourceInfo = { memAvailable: number } -export type RunMode = 'cpu' | 'gpu' - export type GpuSetting = { - notify: boolean - run_mode: RunMode - nvidia_driver: { - exist: boolean - version: string - } - cuda: { - exist: boolean - version: string - } gpus: GpuSettingInfo[] - gpu_highest_vram: string - gpus_in_use: string[] - is_initial: boolean // TODO: This needs to be set based on user toggle in settings vulkan: boolean + cpu?: any } export type GpuSettingInfo = { + activated: boolean + free_vram: number id: string - vram: string name: string - arch?: string + total_vram: number + uuid: string + version: string + additional_information?: GpuAdditionalInformation } export type SystemInformation = { @@ -42,9 +34,6 @@ export type SupportedPlatform = SupportedPlatformTuple[number] export type OperatingSystemInfo = { platform: SupportedPlatform | 'unknown' arch: string - release: string - machine: string - version: string totalMem: number freeMem: number } diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts index 482dfa1ac..6e47c9ae4 100644 --- a/core/src/types/model/modelEntity.ts +++ b/core/src/types/model/modelEntity.ts @@ -71,7 +71,7 @@ export type Model = { /** * The model identifier, modern version of id. */ - mode?: string + model?: string /** * Human-readable name that is used for UI. @@ -150,6 +150,7 @@ export type ModelSettingParams = { */ export type ModelRuntimeParams = { temperature?: number + max_temperature?: number token_limit?: number top_k?: number top_p?: number diff --git a/core/src/types/monitoring/index.test.ts b/core/src/types/monitoring/index.test.ts deleted file mode 100644 index 56c5879e4..000000000 --- a/core/src/types/monitoring/index.test.ts +++ /dev/null @@ -1,13 +0,0 @@ -import * as monitoringInterface from './monitoringInterface' -import * as resourceInfo from './resourceInfo' - -import * as index from './index' - -it('should re-export all symbols from monitoringInterface and resourceInfo', () => { - for (const key in monitoringInterface) { - expect(index[key]).toBe(monitoringInterface[key]) - } - for (const key in resourceInfo) { - expect(index[key]).toBe(resourceInfo[key]) - } -}) diff --git a/core/src/types/monitoring/index.ts b/core/src/types/monitoring/index.ts deleted file mode 100644 index b96c518fd..000000000 --- a/core/src/types/monitoring/index.ts +++ /dev/null @@ -1,2 +0,0 @@ -export * from './monitoringInterface' -export * from './resourceInfo' diff --git a/core/src/types/monitoring/monitoringInterface.ts b/core/src/types/monitoring/monitoringInterface.ts deleted file mode 100644 index 5ab1394a1..000000000 --- a/core/src/types/monitoring/monitoringInterface.ts +++ /dev/null @@ -1,29 +0,0 @@ -import { GpuSetting, OperatingSystemInfo } from '../miscellaneous' - -/** - * Monitoring extension for system monitoring. - * @extends BaseExtension - */ -export interface MonitoringInterface { - /** - * Returns information about the system resources. - * @returns {Promise} A promise that resolves with the system resources information. - */ - getResourcesInfo(): Promise - - /** - * Returns the current system load. - * @returns {Promise} A promise that resolves with the current system load. - */ - getCurrentLoad(): Promise - - /** - * Returns the GPU configuration. - */ - getGpuSetting(): Promise - - /** - * Returns information about the operating system. - */ - getOsInfo(): Promise -} diff --git a/core/src/types/monitoring/resourceInfo.ts b/core/src/types/monitoring/resourceInfo.ts deleted file mode 100644 index b19da5462..000000000 --- a/core/src/types/monitoring/resourceInfo.ts +++ /dev/null @@ -1,6 +0,0 @@ -export type ResourceInfo = { - mem: { - totalMemory: number - usedMemory: number - } -} diff --git a/docs/src/pages/docs/configure-extensions.mdx b/docs/src/pages/docs/configure-extensions.mdx index c72d8c6a5..71d226554 100644 --- a/docs/src/pages/docs/configure-extensions.mdx +++ b/docs/src/pages/docs/configure-extensions.mdx @@ -55,7 +55,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior "productName": "Conversational", "version": "1.0.0", "main": "dist/index.js", - "description": "This extension enables conversations and state persistence via your filesystem", + "description": "This extension enables conversations and state persistence via your filesystem.", "url": "extension://@janhq/conversational-extension/dist/index.js" }, "@janhq/inference-anthropic-extension": { @@ -70,7 +70,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior "productName": "Anthropic Inference Engine", "version": "1.0.2", "main": "dist/index.js", - "description": "This extension enables Anthropic chat completion API calls", + "description": "This extension enables Anthropic chat completion API calls.", "url": "extension://@janhq/inference-anthropic-extension/dist/index.js" }, "@janhq/inference-triton-trt-llm-extension": { @@ -85,7 +85,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior "productName": "Triton-TRT-LLM Inference Engine", "version": "1.0.0", "main": "dist/index.js", - "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option", + "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option.", "url": "extension://@janhq/inference-triton-trt-llm-extension/dist/index.js" }, "@janhq/inference-mistral-extension": { @@ -100,7 +100,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior "productName": "MistralAI Inference Engine", "version": "1.0.1", "main": "dist/index.js", - "description": "This extension enables Mistral chat completion API calls", + "description": "This extension enables Mistral chat completion API calls.", "url": "extension://@janhq/inference-mistral-extension/dist/index.js" }, "@janhq/inference-martian-extension": { @@ -115,7 +115,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior "productName": "Martian Inference Engine", "version": "1.0.1", "main": "dist/index.js", - "description": "This extension enables Martian chat completion API calls", + "description": "This extension enables Martian chat completion API calls.", "url": "extension://@janhq/inference-martian-extension/dist/index.js" }, "@janhq/inference-openrouter-extension": { @@ -130,7 +130,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior "productName": "OpenRouter Inference Engine", "version": "1.0.0", "main": "dist/index.js", - "description": "This extension enables Open Router chat completion API calls", + "description": "This extension enables Open Router chat completion API calls.", "url": "extension://@janhq/inference-openrouter-extension/dist/index.js" }, "@janhq/inference-nvidia-extension": { @@ -145,7 +145,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior "productName": "NVIDIA NIM Inference Engine", "version": "1.0.1", "main": "dist/index.js", - "description": "This extension enables NVIDIA chat completion API calls", + "description": "This extension enables NVIDIA chat completion API calls.", "url": "extension://@janhq/inference-nvidia-extension/dist/index.js" }, "@janhq/inference-groq-extension": { @@ -160,7 +160,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior "productName": "Groq Inference Engine", "version": "1.0.1", "main": "dist/index.js", - "description": "This extension enables fast Groq chat completion API calls", + "description": "This extension enables fast Groq chat completion API calls.", "url": "extension://@janhq/inference-groq-extension/dist/index.js" }, "@janhq/inference-openai-extension": { @@ -175,7 +175,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior "productName": "OpenAI Inference Engine", "version": "1.0.2", "main": "dist/index.js", - "description": "This extension enables OpenAI chat completion API calls", + "description": "This extension enables OpenAI chat completion API calls.", "url": "extension://@janhq/inference-openai-extension/dist/index.js" }, "@janhq/inference-cohere-extension": { @@ -190,7 +190,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior "productName": "Cohere Inference Engine", "version": "1.0.0", "main": "dist/index.js", - "description": "This extension enables Cohere chat completion API calls", + "description": "This extension enables Cohere chat completion API calls.", "url": "extension://@janhq/inference-cohere-extension/dist/index.js" }, "@janhq/model-extension": { @@ -205,7 +205,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior "productName": "Model Management", "version": "1.0.33", "main": "dist/index.js", - "description": "Model Management Extension provides model exploration and seamless downloads", + "description": "Model Management Extension provides model exploration and seamless downloads.", "url": "extension://@janhq/model-extension/dist/index.js" }, "@janhq/monitoring-extension": { @@ -220,7 +220,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior "productName": "System Monitoring", "version": "1.0.10", "main": "dist/index.js", - "description": "This extension provides system health and OS level data", + "description": "This extension provides system health and OS level data.", "url": "extension://@janhq/monitoring-extension/dist/index.js" }, "@janhq/assistant-extension": { @@ -235,7 +235,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior "productName": "Jan Assistant", "version": "1.0.1", "main": "dist/index.js", - "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models", + "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models.", "url": "extension://@janhq/assistant-extension/dist/index.js" }, "@janhq/tensorrt-llm-extension": { diff --git a/docs/src/pages/docs/install-engines.mdx b/docs/src/pages/docs/install-engines.mdx index 4e99d337d..cded71c83 100644 --- a/docs/src/pages/docs/install-engines.mdx +++ b/docs/src/pages/docs/install-engines.mdx @@ -47,8 +47,8 @@ To add a new remote engine: |-------|-------------|----------| | Engine Name | Name for your engine (e.g., "OpenAI", "Claude") | ✓ | | API URL | The base URL of the provider's API | ✓ | -| API Key | Your authentication key from the provider | ✓ | -| Model List URL | URL for fetching available models | | +| API Key | Your authentication key to activate this engine | ✓ | +| Model List URL | The endpoint URL to fetch available models | | API Key Template | Custom authorization header format | | | Request Format Conversion | Function to convert Jan's request format to provider's format | | | Response Format Conversion | Function to convert provider's response format to Jan's format | | diff --git a/electron/main.ts b/electron/main.ts index 6ce7f476a..42d16bb74 100644 --- a/electron/main.ts +++ b/electron/main.ts @@ -28,6 +28,7 @@ import { setupReactDevTool } from './utils/dev' import { trayManager } from './managers/tray' import { logSystemInfo } from './utils/system' import { registerGlobalShortcuts } from './utils/shortcut' +import { registerLogger } from './utils/logger' const preloadPath = join(__dirname, 'preload.js') const rendererPath = join(__dirname, '..', 'renderer') @@ -79,6 +80,7 @@ app }) .then(setupCore) .then(createUserSpace) + .then(registerLogger) .then(migrate) .then(setupExtensions) .then(setupMenu) diff --git a/electron/package.json b/electron/package.json index 700f009a5..59761623c 100644 --- a/electron/package.json +++ b/electron/package.json @@ -1,6 +1,6 @@ { "name": "jan", - "version": "0.1.4", + "version": "0.1.1737985524", "main": "./build/main.js", "author": "Jan ", "license": "MIT", diff --git a/extensions/monitoring-extension/src/node/logger.ts b/electron/utils/logger.ts similarity index 77% rename from extensions/monitoring-extension/src/node/logger.ts rename to electron/utils/logger.ts index ca64ea2d9..48af0b93a 100644 --- a/extensions/monitoring-extension/src/node/logger.ts +++ b/electron/utils/logger.ts @@ -1,16 +1,28 @@ -import fs from 'fs' +import { + createWriteStream, + existsSync, + mkdirSync, + readdir, + stat, + unlink, + writeFileSync, +} from 'fs' import util from 'util' import { getAppConfigurations, getJanDataFolderPath, Logger, + LoggerManager, } from '@janhq/core/node' import path, { join } from 'path' -export class FileLogger extends Logger { +/** + * File Logger + */ +export class FileLogger implements Logger { name = 'file' logCleaningInterval: number = 120000 - timeout: NodeJS.Timeout | null = null + timeout: NodeJS.Timeout | undefined appLogPath: string = './' logEnabled: boolean = true @@ -18,14 +30,13 @@ export class FileLogger extends Logger { logEnabled: boolean = true, logCleaningInterval: number = 120000 ) { - super() this.logEnabled = logEnabled if (logCleaningInterval) this.logCleaningInterval = logCleaningInterval const appConfigurations = getAppConfigurations() const logFolderPath = join(appConfigurations.data_folder, 'logs') - if (!fs.existsSync(logFolderPath)) { - fs.mkdirSync(logFolderPath, { recursive: true }) + if (!existsSync(logFolderPath)) { + mkdirSync(logFolderPath, { recursive: true }) } this.appLogPath = join(logFolderPath, 'app.log') @@ -69,8 +80,8 @@ export class FileLogger extends Logger { const logDirectory = path.join(getJanDataFolderPath(), 'logs') // Perform log cleaning const currentDate = new Date() - if (fs.existsSync(logDirectory)) - fs.readdir(logDirectory, (err, files) => { + if (existsSync(logDirectory)) + readdir(logDirectory, (err, files) => { if (err) { console.error('Error reading log directory:', err) return @@ -78,7 +89,7 @@ export class FileLogger extends Logger { files.forEach((file) => { const filePath = path.join(logDirectory, file) - fs.stat(filePath, (err, stats) => { + stat(filePath, (err, stats) => { if (err) { console.error('Error getting file stats:', err) return @@ -86,7 +97,7 @@ export class FileLogger extends Logger { // Check size if (stats.size > size) { - fs.unlink(filePath, (err) => { + unlink(filePath, (err) => { if (err) { console.error('Error deleting log file:', err) return @@ -103,7 +114,7 @@ export class FileLogger extends Logger { (1000 * 3600 * 24) ) if (daysDifference > days) { - fs.unlink(filePath, (err) => { + unlink(filePath, (err) => { if (err) { console.error('Error deleting log file:', err) return @@ -124,15 +135,20 @@ export class FileLogger extends Logger { } } +/** + * Write log function implementation + * @param message + * @param logPath + */ const writeLog = (message: string, logPath: string) => { - if (!fs.existsSync(logPath)) { + if (!existsSync(logPath)) { const logDirectory = path.join(getJanDataFolderPath(), 'logs') - if (!fs.existsSync(logDirectory)) { - fs.mkdirSync(logDirectory) + if (!existsSync(logDirectory)) { + mkdirSync(logDirectory) } - fs.writeFileSync(logPath, message) + writeFileSync(logPath, message) } else { - const logFile = fs.createWriteStream(logPath, { + const logFile = createWriteStream(logPath, { flags: 'a', }) logFile.write(util.format(message) + '\n') @@ -140,3 +156,12 @@ const writeLog = (message: string, logPath: string) => { console.debug(message) } } + +/** + * Register logger for global application logging + */ +export const registerLogger = () => { + const logger = new FileLogger() + LoggerManager.instance().register(logger) + logger.cleanLogs() +} diff --git a/extensions/engine-management-extension/engines.mjs b/extensions/engine-management-extension/engines.mjs index e85035423..eafe8a09c 100644 --- a/extensions/engine-management-extension/engines.mjs +++ b/extensions/engine-management-extension/engines.mjs @@ -6,6 +6,8 @@ import groq from './resources/groq.json' with { type: 'json' } import martian from './resources/martian.json' with { type: 'json' } import mistral from './resources/mistral.json' with { type: 'json' } import nvidia from './resources/nvidia.json' with { type: 'json' } +import deepseek from './resources/deepseek.json' with { type: 'json' } +import googleGemini from './resources/google_gemini.json' with { type: 'json' } import anthropicModels from './models/anthropic.json' with { type: 'json' } import cohereModels from './models/cohere.json' with { type: 'json' } @@ -15,6 +17,8 @@ import groqModels from './models/groq.json' with { type: 'json' } import martianModels from './models/martian.json' with { type: 'json' } import mistralModels from './models/mistral.json' with { type: 'json' } import nvidiaModels from './models/nvidia.json' with { type: 'json' } +import deepseekModels from './models/deepseek.json' with { type: 'json' } +import googleGeminiModels from './models/google_gemini.json' with { type: 'json' } const engines = [ anthropic, @@ -25,6 +29,8 @@ const engines = [ mistral, martian, nvidia, + deepseek, + googleGemini, ] const models = [ ...anthropicModels, @@ -35,5 +41,7 @@ const models = [ ...mistralModels, ...martianModels, ...nvidiaModels, + ...deepseekModels, + ...googleGeminiModels, ] export { engines, models } diff --git a/extensions/engine-management-extension/models/anthropic.json b/extensions/engine-management-extension/models/anthropic.json index 46b5893d1..0212ce1da 100644 --- a/extensions/engine-management-extension/models/anthropic.json +++ b/extensions/engine-management-extension/models/anthropic.json @@ -8,6 +8,7 @@ "inference_params": { "max_tokens": 4096, "temperature": 0.7, + "max_temperature": 1.0, "stream": true }, "engine": "anthropic" @@ -21,6 +22,7 @@ "inference_params": { "max_tokens": 8192, "temperature": 0.7, + "max_temperature": 1.0, "stream": true }, "engine": "anthropic" @@ -34,6 +36,7 @@ "inference_params": { "max_tokens": 8192, "temperature": 0.7, + "max_temperature": 1.0, "stream": true }, "engine": "anthropic" diff --git a/extensions/engine-management-extension/models/cohere.json b/extensions/engine-management-extension/models/cohere.json index 458e4278b..96a830637 100644 --- a/extensions/engine-management-extension/models/cohere.json +++ b/extensions/engine-management-extension/models/cohere.json @@ -8,6 +8,7 @@ "inference_params": { "max_tokens": 4096, "temperature": 0.7, + "max_temperature": 1.0, "stream": false }, "engine": "cohere" @@ -21,6 +22,7 @@ "inference_params": { "max_tokens": 4096, "temperature": 0.7, + "max_temperature": 1.0, "stream": false }, "engine": "cohere" diff --git a/extensions/engine-management-extension/models/deepseek.json b/extensions/engine-management-extension/models/deepseek.json new file mode 100644 index 000000000..29d5406bf --- /dev/null +++ b/extensions/engine-management-extension/models/deepseek.json @@ -0,0 +1,28 @@ +[ + { + "model": "deepseek-chat", + "object": "model", + "name": "DeepSeek Chat", + "version": "1.0", + "description": "The deepseek-chat model has been upgraded to DeepSeek-V3. deepseek-reasoner points to the new model DeepSeek-R1", + "inference_params": { + "max_tokens": 8192, + "temperature": 0.6, + "stream": true + }, + "engine": "deepseek" + }, + { + "model": "deepseek-reasoner", + "object": "model", + "name": "DeepSeek R1", + "version": "1.0", + "description": "CoT (Chain of Thought) is the reasoning content deepseek-reasoner gives before output the final answer. For details, please refer to Reasoning Model.", + "inference_params": { + "max_tokens": 8192, + "temperature": 0.6, + "stream": true + }, + "engine": "deepseek" + } +] diff --git a/extensions/engine-management-extension/models/google_gemini.json b/extensions/engine-management-extension/models/google_gemini.json new file mode 100644 index 000000000..392754ee6 --- /dev/null +++ b/extensions/engine-management-extension/models/google_gemini.json @@ -0,0 +1,67 @@ +[ + { + "model": "gemini-2.0-flash", + "object": "model", + "name": "Gemini 2.0 Flash", + "version": "1.0", + "description": "Gemini 2.0 Flash delivers next-gen features and improved capabilities, including superior speed, native tool use, multimodal generation, and a 1M token context window.", + "inference_params": { + "max_tokens": 8192, + "temperature": 0.6, + "stream": true + }, + "engine": "google_gemini" + }, + { + "model": "gemini-2.0-flash-lite-preview", + "object": "model", + "name": "Gemini 2.0 Flash-Lite Preview", + "version": "1.0", + "description": "A Gemini 2.0 Flash model optimized for cost efficiency and low latency.", + "inference_params": { + "max_tokens": 8192, + "temperature": 0.6, + "stream": true + }, + "engine": "google_gemini" + }, + { + "model": "gemini-1.5-flash", + "object": "model", + "name": "Gemini 1.5 Flash", + "version": "1.0", + "description": "Gemini 1.5 Flash is a fast and versatile multimodal model for scaling across diverse tasks.", + "inference_params": { + "max_tokens": 8192, + "temperature": 0.6, + "stream": true + }, + "engine": "google_gemini" + }, + { + "model": "gemini-1.5-flash-8b", + "object": "model", + "name": "Gemini 1.5 Flash-8B", + "version": "1.0", + "description": "Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks.", + "inference_params": { + "max_tokens": 8192, + "temperature": 0.6, + "stream": true + }, + "engine": "google_gemini" + }, + { + "model": "gemini-1.5-pro", + "object": "model", + "name": "Gemini 1.5 Pro", + "version": "1.0", + "description": "Gemini 1.5 Pro is a mid-size multimodal model that is optimized for a wide-range of reasoning tasks. 1.5 Pro can process large amounts of data at once, including 2 hours of video, 19 hours of audio, codebases with 60,000 lines of code, or 2,000 pages of text. ", + "inference_params": { + "max_tokens": 8192, + "temperature": 0.6, + "stream": true + }, + "engine": "google_gemini" + } +] diff --git a/extensions/engine-management-extension/models/mistral.json b/extensions/engine-management-extension/models/mistral.json index 12fcf938d..47df5d506 100644 --- a/extensions/engine-management-extension/models/mistral.json +++ b/extensions/engine-management-extension/models/mistral.json @@ -8,6 +8,7 @@ "inference_params": { "max_tokens": 32000, "temperature": 0.7, + "max_temperature": 1.0, "top_p": 0.95, "stream": true }, @@ -22,6 +23,7 @@ "inference_params": { "max_tokens": 32000, "temperature": 0.7, + "max_temperature": 1.0, "top_p": 0.95, "stream": true }, @@ -36,6 +38,7 @@ "inference_params": { "max_tokens": 32000, "temperature": 0.7, + "max_temperature": 1.0, "top_p": 0.95, "stream": true }, diff --git a/extensions/engine-management-extension/models/nvidia.json b/extensions/engine-management-extension/models/nvidia.json index dfce9f8bc..cb6f9dec1 100644 --- a/extensions/engine-management-extension/models/nvidia.json +++ b/extensions/engine-management-extension/models/nvidia.json @@ -8,6 +8,7 @@ "inference_params": { "max_tokens": 1024, "temperature": 0.3, + "max_temperature": 1.0, "top_p": 1, "stream": false, "frequency_penalty": 0, diff --git a/extensions/engine-management-extension/models/openai.json b/extensions/engine-management-extension/models/openai.json index 8f59b42ea..5c1e70b5a 100644 --- a/extensions/engine-management-extension/models/openai.json +++ b/extensions/engine-management-extension/models/openai.json @@ -79,12 +79,7 @@ "description": "OpenAI o1 is a new model with complex reasoning", "format": "api", "inference_params": { - "max_tokens": 100000, - "temperature": 1, - "top_p": 1, - "stream": true, - "frequency_penalty": 0, - "presence_penalty": 0 + "max_tokens": 100000 }, "engine": "openai" }, @@ -97,11 +92,7 @@ "format": "api", "inference_params": { "max_tokens": 32768, - "temperature": 1, - "top_p": 1, - "stream": true, - "frequency_penalty": 0, - "presence_penalty": 0 + "stream": true }, "engine": "openai" }, @@ -114,11 +105,20 @@ "format": "api", "inference_params": { "max_tokens": 65536, - "temperature": 1, - "top_p": 1, - "stream": true, - "frequency_penalty": 0, - "presence_penalty": 0 + "stream": true + }, + "engine": "openai" + }, + { + "model": "o3-mini", + "object": "model", + "name": "OpenAI o3-mini", + "version": "1.0", + "description": "OpenAI most recent reasoning model, providing high intelligence at the same cost and latency targets of o1-mini.", + "format": "api", + "inference_params": { + "max_tokens": 100000, + "stream": true }, "engine": "openai" } diff --git a/extensions/engine-management-extension/models/openrouter.json b/extensions/engine-management-extension/models/openrouter.json index b9714bb57..bf132533c 100644 --- a/extensions/engine-management-extension/models/openrouter.json +++ b/extensions/engine-management-extension/models/openrouter.json @@ -1,16 +1,91 @@ [ { - "model": "open-router-auto", + "model": "deepseek/deepseek-r1:free", "object": "model", - "name": "OpenRouter", + "name": "DeepSeek: R1", "version": "1.0", - "description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.", + "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.", "inference_params": { - "max_tokens": 128000, "temperature": 0.7, "top_p": 0.95, "frequency_penalty": 0, - "presence_penalty": 0 + "presence_penalty": 0, + "stream": true + }, + "engine": "openrouter" + }, + { + "model": "deepseek/deepseek-r1-distill-llama-70b:free", + "object": "model", + "name": "DeepSeek: R1 Distill Llama 70B", + "version": "1.0", + "description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.", + "inference_params": { + "temperature": 0.7, + "top_p": 0.95, + "frequency_penalty": 0, + "presence_penalty": 0, + "stream": true + }, + "engine": "openrouter" + }, + { + "model": "deepseek/deepseek-r1-distill-llama-70b:free", + "object": "model", + "name": "DeepSeek: R1 Distill Llama 70B", + "version": "1.0", + "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.", + "inference_params": { + "temperature": 0.7, + "top_p": 0.95, + "frequency_penalty": 0, + "presence_penalty": 0, + "stream": true + }, + "engine": "openrouter" + }, + { + "model": "meta-llama/llama-3.1-405b-instruct:free", + "object": "model", + "name": "Meta: Llama 3.1 405B Instruct", + "version": "1.0", + "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.", + "inference_params": { + "temperature": 0.7, + "top_p": 0.95, + "frequency_penalty": 0, + "presence_penalty": 0, + "stream": true + }, + "engine": "openrouter" + }, + { + "model": "qwen/qwen-vl-plus:free", + "object": "model", + "name": "Qwen: Qwen VL Plus", + "version": "1.0", + "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.", + "inference_params": { + "temperature": 0.7, + "top_p": 0.95, + "frequency_penalty": 0, + "presence_penalty": 0, + "stream": true + }, + "engine": "openrouter" + }, + { + "model": "qwen/qwen2.5-vl-72b-instruct:free", + "object": "model", + "name": "Qwen: Qwen2.5 VL 72B Instruct", + "version": "1.0", + "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.", + "inference_params": { + "temperature": 0.7, + "top_p": 0.95, + "frequency_penalty": 0, + "presence_penalty": 0, + "stream": true }, "engine": "openrouter" } diff --git a/extensions/engine-management-extension/package.json b/extensions/engine-management-extension/package.json index 96f962ffd..571a3852b 100644 --- a/extensions/engine-management-extension/package.json +++ b/extensions/engine-management-extension/package.json @@ -29,12 +29,10 @@ }, "dependencies": { "@janhq/core": "../../core/package.tgz", - "cpu-instructions": "^0.0.13", "ky": "^1.7.2", "p-queue": "^8.0.1" }, "bundledDependencies": [ - "cpu-instructions", "@janhq/core" ], "engines": { diff --git a/extensions/engine-management-extension/resources/anthropic.json b/extensions/engine-management-extension/resources/anthropic.json index 12a3f08b8..4172bcd0b 100644 --- a/extensions/engine-management-extension/resources/anthropic.json +++ b/extensions/engine-management-extension/resources/anthropic.json @@ -1,5 +1,5 @@ { - "id": "@janhq/inference-anthropic-extension", + "id": "anthropic", "type": "remote", "engine": "anthropic", "url": "https://console.anthropic.com/settings/keys", @@ -10,13 +10,14 @@ "transform_req": { "chat_completions": { "url": "https://api.anthropic.com/v1/messages", - "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": \"{{ input_request.messages.0.content }}\", \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": \"{{ message.role}}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }" + "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": {{ tojson(input_request.messages.0.content) }}, \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"metadata\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }" } }, "transform_resp": { "chat_completions": { - "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": \"{{ input_request.model }}\", \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"{{ input_request.role }}\", \"content\": {% if input_request.content and input_request.content.0.type == \"text\" %} \"{{input_request.content.0.text}}\" {% else %} null {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.stop_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.usage.input_tokens }}, \"completion_tokens\": {{ input_request.usage.output_tokens }}, \"total_tokens\": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 }, \"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}" + "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {{tojson(input_request)}} {% endif %}" } - } + }, + "explore_models_url": "https://docs.anthropic.com/en/docs/about-claude/models" } } diff --git a/extensions/engine-management-extension/resources/cohere.json b/extensions/engine-management-extension/resources/cohere.json index b10e00e5b..43cd0da5b 100644 --- a/extensions/engine-management-extension/resources/cohere.json +++ b/extensions/engine-management-extension/resources/cohere.json @@ -1,5 +1,5 @@ { - "id": "@janhq/inference-cohere-extension", + "id": "cohere", "type": "remote", "engine": "cohere", "url": "https://dashboard.cohere.com/api-keys", @@ -10,13 +10,14 @@ "transform_req": { "chat_completions": { "url": "https://api.cohere.ai/v1/chat", - "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": \"{{ input_request.messages.0.content }}\", {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }" + "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": {{ tojson(input_request.messages.0.content) }}, {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }" } }, "transform_resp": { "chat_completions": { "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.event_type == \"text-generation\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.text }}\" {% else %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.event_type == \"stream-end\" %} \"finish_reason\": \"{{ input_request.finish_reason }}\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.generation_id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": {% if input_request.model %} \"{{ input_request.model }}\" {% else %} \"command-r-plus-08-2024\" {% endif %}, \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"assistant\", \"content\": {% if not input_request.text %} null {% else %} \"{{ input_request.text }}\" {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.finish_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.meta.tokens.input_tokens }}, \"completion_tokens\": {{ input_request.meta.tokens.output_tokens }},\"total_tokens\": {{ input_request.meta.tokens.input_tokens + input_request.meta.tokens.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 },\"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}" } - } + }, + "explore_models_url": "https://docs.cohere.com/v2/docs/models" } } diff --git a/extensions/engine-management-extension/resources/deepseek.json b/extensions/engine-management-extension/resources/deepseek.json new file mode 100644 index 000000000..214ec3b23 --- /dev/null +++ b/extensions/engine-management-extension/resources/deepseek.json @@ -0,0 +1,23 @@ +{ + "id": "deepseek", + "type": "remote", + "engine": "deepseek", + "url": "https://platform.deepseek.com/api_keys", + "api_key": "", + "metadata": { + "get_models_url": "https://api.deepseek.com/models", + "header_template": "Authorization: Bearer {{api_key}}", + "transform_req": { + "chat_completions": { + "url": "https://api.deepseek.com/chat/completions", + "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" + } + }, + "transform_resp": { + "chat_completions": { + "template": "{{tojson(input_request)}}" + } + }, + "explore_models_url": "https://api-docs.deepseek.com/quick_start/pricing" + } +} diff --git a/extensions/engine-management-extension/resources/google_gemini.json b/extensions/engine-management-extension/resources/google_gemini.json new file mode 100644 index 000000000..e0fa809a5 --- /dev/null +++ b/extensions/engine-management-extension/resources/google_gemini.json @@ -0,0 +1,23 @@ +{ + "id": "google_gemini", + "type": "remote", + "engine": "google_gemini", + "url": "https://aistudio.google.com/apikey", + "api_key": "", + "metadata": { + "get_models_url": "https://generativelanguage.googleapis.com/v1beta/models", + "header_template": "Authorization: Bearer {{api_key}}", + "transform_req": { + "chat_completions": { + "url": "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions", + "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" + } + }, + "transform_resp": { + "chat_completions": { + "template": "{{tojson(input_request)}}" + } + }, + "explore_models_url": "https://ai.google.dev/gemini-api/docs/models/gemini" + } +} diff --git a/extensions/engine-management-extension/resources/groq.json b/extensions/engine-management-extension/resources/groq.json index 60d553a92..87d215ab2 100644 --- a/extensions/engine-management-extension/resources/groq.json +++ b/extensions/engine-management-extension/resources/groq.json @@ -1,5 +1,5 @@ { - "id": "@janhq/inference-groq-extension", + "id": "groq", "type": "remote", "engine": "groq", "url": "https://console.groq.com/keys", @@ -15,8 +15,9 @@ }, "transform_resp": { "chat_completions": { - "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" + "template": "{{tojson(input_request)}}" } - } + }, + "explore_models_url": "https://console.groq.com/docs/models" } } diff --git a/extensions/engine-management-extension/resources/martian.json b/extensions/engine-management-extension/resources/martian.json index 3a65f3981..3fd458660 100644 --- a/extensions/engine-management-extension/resources/martian.json +++ b/extensions/engine-management-extension/resources/martian.json @@ -1,5 +1,5 @@ { - "id": "@janhq/inference-martian-extension", + "id": "martian", "type": "remote", "engine": "martian", "url": "https://withmartian.com/dashboard", @@ -15,8 +15,9 @@ }, "transform_resp": { "chat_completions": { - "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" + "template": "{{tojson(input_request)}}" } - } + }, + "explore_models_url": "https://withmartian.github.io/llm-adapters/" } } diff --git a/extensions/engine-management-extension/resources/mistral.json b/extensions/engine-management-extension/resources/mistral.json index 3f447dc4c..4a24471a2 100644 --- a/extensions/engine-management-extension/resources/mistral.json +++ b/extensions/engine-management-extension/resources/mistral.json @@ -1,5 +1,5 @@ { - "id": "@janhq/inference-mistral-extension", + "id": "mistral", "type": "remote", "engine": "mistral", "url": "https://console.mistral.ai/api-keys/", @@ -17,6 +17,7 @@ "chat_completions": { "template": "{{tojson(input_request)}}" } - } + }, + "explore_models_url": "https://docs.mistral.ai/getting-started/models/models_overview/" } } diff --git a/extensions/engine-management-extension/resources/nvidia.json b/extensions/engine-management-extension/resources/nvidia.json index 240130090..573bad4f6 100644 --- a/extensions/engine-management-extension/resources/nvidia.json +++ b/extensions/engine-management-extension/resources/nvidia.json @@ -1,5 +1,5 @@ { - "id": "@janhq/inference-nvidia-extension", + "id": "nvidia", "type": "remote", "engine": "nvidia", "url": "https://org.ngc.nvidia.com/setup/personal-keys", @@ -15,8 +15,9 @@ }, "transform_resp": { "chat_completions": { - "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" + "template": "{{tojson(input_request)}}" } - } + }, + "explore_models_url": "https://build.nvidia.com/models" } } diff --git a/extensions/engine-management-extension/resources/openai.json b/extensions/engine-management-extension/resources/openai.json index 97effd42a..42f421072 100644 --- a/extensions/engine-management-extension/resources/openai.json +++ b/extensions/engine-management-extension/resources/openai.json @@ -1,5 +1,5 @@ { - "id": "@janhq/inference-openai-extension", + "id": "openai", "type": "remote", "engine": "openai", "url": "https://platform.openai.com/account/api-keys", @@ -10,13 +10,14 @@ "transform_req": { "chat_completions": { "url": "https://api.openai.com/v1/chat/completions", - "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }" + "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% set first = false %} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }" } }, "transform_resp": { "chat_completions": { - "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" + "template": "{{tojson(input_request)}}" } - } + }, + "explore_models_url": "https://platform.openai.com/docs/models" } } diff --git a/extensions/engine-management-extension/resources/openrouter.json b/extensions/engine-management-extension/resources/openrouter.json index 45dc48414..798199708 100644 --- a/extensions/engine-management-extension/resources/openrouter.json +++ b/extensions/engine-management-extension/resources/openrouter.json @@ -1,5 +1,5 @@ { - "id": "@janhq/inference-openrouter-extension", + "id": "openrouter", "type": "remote", "engine": "openrouter", "url": "https://openrouter.ai/keys", @@ -10,13 +10,14 @@ "transform_req": { "chat_completions": { "url": "https://openrouter.ai/api/v1/chat/completions", - "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" + "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" } }, "transform_resp": { "chat_completions": { - "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" + "template": "{{tojson(input_request)}}" } - } + }, + "explore_models_url": "https://openrouter.ai/models" } } diff --git a/extensions/engine-management-extension/rolldown.config.mjs b/extensions/engine-management-extension/rolldown.config.mjs index 1290338db..d89fd9bfa 100644 --- a/extensions/engine-management-extension/rolldown.config.mjs +++ b/extensions/engine-management-extension/rolldown.config.mjs @@ -13,9 +13,19 @@ export default defineConfig([ NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`), API_URL: JSON.stringify('http://127.0.0.1:39291'), SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'), + PLATFORM: JSON.stringify(process.platform), CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'), DEFAULT_REMOTE_ENGINES: JSON.stringify(engines), DEFAULT_REMOTE_MODELS: JSON.stringify(models), + DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify( + `{ {% set first = true %} {% for key, value in input_request %} {% if key == "messages" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} {% if not first %},{% endif %} "{{ key }}": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }` + ), + DEFAULT_RESPONSE_BODY_TRANSFORM: JSON.stringify( + '{{tojson(input_request)}}' + ), + DEFAULT_REQUEST_HEADERS_TRANSFORM: JSON.stringify( + 'Authorization: Bearer {{api_key}}' + ), }, }, { @@ -29,15 +39,4 @@ export default defineConfig([ CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'), }, }, - { - input: 'src/node/cpuInfo.ts', - output: { - format: 'cjs', - file: 'dist/node/cpuInfo.js', - }, - external: ['cpu-instructions'], - resolve: { - extensions: ['.ts', '.js', '.svg'], - }, - }, ]) diff --git a/extensions/engine-management-extension/src/@types/global.d.ts b/extensions/engine-management-extension/src/@types/global.d.ts index 2d520d5f9..55874ea9a 100644 --- a/extensions/engine-management-extension/src/@types/global.d.ts +++ b/extensions/engine-management-extension/src/@types/global.d.ts @@ -1,7 +1,11 @@ declare const API_URL: string declare const CORTEX_ENGINE_VERSION: string +declare const PLATFORM: string declare const SOCKET_URL: string declare const NODE: string +declare const DEFAULT_REQUEST_PAYLOAD_TRANSFORM: string +declare const DEFAULT_RESPONSE_BODY_TRANSFORM: string +declare const DEFAULT_REQUEST_HEADERS_TRANSFORM: string declare const DEFAULT_REMOTE_ENGINES: ({ id: string diff --git a/extensions/engine-management-extension/src/index.ts b/extensions/engine-management-extension/src/index.ts index 0d30bf4ea..e2730cc71 100644 --- a/extensions/engine-management-extension/src/index.ts +++ b/extensions/engine-management-extension/src/index.ts @@ -19,12 +19,16 @@ import ky, { HTTPError } from 'ky' import PQueue from 'p-queue' import { EngineError } from './error' import { getJanDataFolderPath } from '@janhq/core' +import { engineVariant } from './utils' +interface ModelList { + data: Model[] +} /** - * JSONEngineManagementExtension is a EngineManagementExtension implementation that provides + * JanEngineManagementExtension is a EngineManagementExtension implementation that provides * functionality for managing engines. */ -export default class JSONEngineManagementExtension extends EngineManagementExtension { +export default class JanEngineManagementExtension extends EngineManagementExtension { queue = new PQueue({ concurrency: 1 }) /** @@ -63,13 +67,12 @@ export default class JSONEngineManagementExtension extends EngineManagementExten * @returns A Promise that resolves to an object of list engines. */ async getRemoteModels(name: string): Promise { - return this.queue.add(() => - ky - .get(`${API_URL}/v1/models/remote/${name}`) - .json() - .then((e) => e) - .catch(() => []) - ) as Promise + return ky + .get(`${API_URL}/v1/models/remote/${name}`) + .json() + .catch(() => ({ + data: [], + })) as Promise } /** @@ -138,9 +141,38 @@ export default class JSONEngineManagementExtension extends EngineManagementExten * Add a new remote engine * @returns A Promise that resolves to intall of engine. */ - async addRemoteEngine(engineConfig: EngineConfig) { + async addRemoteEngine( + engineConfig: EngineConfig, + persistModels: boolean = true + ) { + // Populate default settings + if ( + engineConfig.metadata?.transform_req?.chat_completions && + !engineConfig.metadata.transform_req.chat_completions.template + ) + engineConfig.metadata.transform_req.chat_completions.template = + DEFAULT_REQUEST_PAYLOAD_TRANSFORM + + if ( + engineConfig.metadata?.transform_resp?.chat_completions && + !engineConfig.metadata.transform_resp.chat_completions?.template + ) + engineConfig.metadata.transform_resp.chat_completions.template = + DEFAULT_RESPONSE_BODY_TRANSFORM + + if (engineConfig.metadata && !engineConfig.metadata?.header_template) + engineConfig.metadata.header_template = DEFAULT_REQUEST_HEADERS_TRANSFORM + return this.queue.add(() => - ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => e) + ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => { + if (persistModels && engineConfig.metadata?.get_models_url) { + // Pull /models from remote models endpoint + return this.populateRemoteModels(engineConfig) + .then(() => e) + .catch(() => e) + } + return e + }) ) as Promise<{ messages: string }> } @@ -161,9 +193,25 @@ export default class JSONEngineManagementExtension extends EngineManagementExten * @param model - Remote model object. */ async addRemoteModel(model: Model) { - return this.queue.add(() => - ky.post(`${API_URL}/v1/models/add`, { json: model }).then((e) => e) - ) + return this.queue + .add(() => + ky + .post(`${API_URL}/v1/models/add`, { + json: { + inference_params: { + max_tokens: 4096, + temperature: 0.7, + top_p: 0.95, + stream: true, + frequency_penalty: 0, + presence_penalty: 0, + }, + ...model, + }, + }) + .then((e) => e) + ) + .then(() => {}) } /** @@ -245,11 +293,7 @@ export default class JSONEngineManagementExtension extends EngineManagementExten error instanceof EngineError ) { const systemInfo = await systemInformation() - const variant = await executeOnMain( - NODE, - 'engineVariant', - systemInfo.gpuSetting - ) + const variant = await engineVariant(systemInfo.gpuSetting) await this.setDefaultEngineVariant(InferenceEngine.cortex_llamacpp, { variant: variant, version: `${CORTEX_ENGINE_VERSION}`, @@ -293,14 +337,40 @@ export default class JSONEngineManagementExtension extends EngineManagementExten data.api_key = api_key /// END - Migrate legacy api key settings - await this.addRemoteEngine(data).catch(console.error) + await this.addRemoteEngine(data, false).catch(console.error) }) ) events.emit(EngineEvent.OnEngineUpdate, {}) - DEFAULT_REMOTE_MODELS.forEach(async (data: Model) => { - await this.addRemoteModel(data).catch(() => {}) - }) + await Promise.all( + DEFAULT_REMOTE_MODELS.map((data: Model) => + this.addRemoteModel(data).catch(() => {}) + ) + ) events.emit(ModelEvent.OnModelsUpdate, { fetch: true }) } } + + /** + * Pulls models list from the remote provider and persist + * @param engineConfig + * @returns + */ + private populateRemoteModels = async (engineConfig: EngineConfig) => { + return this.getRemoteModels(engineConfig.engine) + .then((models: ModelList) => { + if (models?.data) + Promise.all( + models.data.map((model) => + this.addRemoteModel({ + ...model, + engine: engineConfig.engine as InferenceEngine, + model: model.model ?? model.id, + }).catch(console.info) + ) + ).then(() => { + events.emit(ModelEvent.OnModelsUpdate, { fetch: true }) + }) + }) + .catch(console.info) + } } diff --git a/extensions/engine-management-extension/src/node/cpuInfo.ts b/extensions/engine-management-extension/src/node/cpuInfo.ts deleted file mode 100644 index 4366a995b..000000000 --- a/extensions/engine-management-extension/src/node/cpuInfo.ts +++ /dev/null @@ -1,27 +0,0 @@ -import { cpuInfo } from 'cpu-instructions' - -// Check the CPU info and determine the supported instruction set -const info = cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512') - ? 'avx512' - : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2') - ? 'avx2' - : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX') - ? 'avx' - : 'noavx' - -// Send the result and wait for confirmation before exiting -new Promise((resolve, reject) => { - // @ts-ignore - process.send(info, (error: Error | null) => { - if (error) { - reject(error) - } else { - resolve() - } - }) -}) - .then(() => process.exit(0)) - .catch((error) => { - console.error('Failed to send info:', error) - process.exit(1) - }) diff --git a/extensions/engine-management-extension/src/node/index.test.ts b/extensions/engine-management-extension/src/node/index.test.ts index c73feb9c6..aa2ac8be8 100644 --- a/extensions/engine-management-extension/src/node/index.test.ts +++ b/extensions/engine-management-extension/src/node/index.test.ts @@ -1,7 +1,6 @@ import { describe, expect, it } from '@jest/globals' import engine from './index' -import { GpuSetting } from '@janhq/core/node' -import { cpuInfo } from 'cpu-instructions' +import { GpuSetting } from '@janhq/core' import { fork } from 'child_process' let testSettings: GpuSetting = { @@ -23,22 +22,12 @@ let testSettings: GpuSetting = { } const originalPlatform = process.platform -jest.mock('cpu-instructions', () => ({ - cpuInfo: { - cpuInfo: jest.fn(), - }, -})) -let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock -mockCpuInfo.mockReturnValue([]) -jest.mock('@janhq/core/node', () => ({ + +jest.mock('@janhq/core', () => ({ appResourcePath: () => '.', log: jest.fn(), })) -jest.mock('child_process', () => ({ - fork: jest.fn(), -})) -const mockFork = fork as jest.Mock describe('test executable cortex file', () => { afterAll(function () { @@ -48,14 +37,7 @@ describe('test executable cortex file', () => { }) it('executes on MacOS', () => { - const mockProcess = { - on: jest.fn((event, callback) => { - if (event === 'message') { - callback('noavx') - } - }), - send: jest.fn(), - } + Object.defineProperty(process, 'platform', { value: 'darwin', }) @@ -63,7 +45,7 @@ describe('test executable cortex file', () => { value: 'arm64', }) - mockFork.mockReturnValue(mockProcess) + expect(engine.engineVariant(testSettings)).resolves.toEqual('mac-arm64') }) @@ -83,7 +65,7 @@ describe('test executable cortex file', () => { }), send: jest.fn(), } - mockFork.mockReturnValue(mockProcess) + Object.defineProperty(process, 'arch', { value: 'x64', }) @@ -107,7 +89,6 @@ describe('test executable cortex file', () => { }), send: jest.fn(), } - mockFork.mockReturnValue(mockProcess) expect(engine.engineVariant()).resolves.toEqual('windows-amd64-avx') }) @@ -145,7 +126,6 @@ describe('test executable cortex file', () => { }), send: jest.fn(), } - mockFork.mockReturnValue(mockProcess) expect(engine.engineVariant(settings)).resolves.toEqual( 'windows-amd64-avx2-cuda-11-7' @@ -176,26 +156,11 @@ describe('test executable cortex file', () => { }, ], } - mockFork.mockReturnValue({ - on: jest.fn((event, callback) => { - if (event === 'message') { - callback('noavx') - } - }), - send: jest.fn(), - }) expect(engine.engineVariant(settings)).resolves.toEqual( 'windows-amd64-noavx-cuda-12-0' ) - mockFork.mockReturnValue({ - on: jest.fn((event, callback) => { - if (event === 'message') { - callback('avx512') - } - }), - send: jest.fn(), - }) + expect(engine.engineVariant(settings)).resolves.toEqual( 'windows-amd64-avx2-cuda-12-0' ) @@ -209,14 +174,6 @@ describe('test executable cortex file', () => { ...testSettings, run_mode: 'cpu', } - mockFork.mockReturnValue({ - on: jest.fn((event, callback) => { - if (event === 'message') { - callback('noavx') - } - }), - send: jest.fn(), - }) expect(engine.engineVariant()).resolves.toEqual('linux-amd64-noavx') }) @@ -245,16 +202,6 @@ describe('test executable cortex file', () => { }, ], } - - mockFork.mockReturnValue({ - on: jest.fn((event, callback) => { - if (event === 'message') { - callback('avx512') - } - }), - send: jest.fn(), - }) - expect(engine.engineVariant(settings)).resolves.toBe( 'linux-amd64-avx2-cuda-11-7' ) @@ -284,14 +231,7 @@ describe('test executable cortex file', () => { }, ], } - mockFork.mockReturnValue({ - on: jest.fn((event, callback) => { - if (event === 'message') { - callback('avx2') - } - }), - send: jest.fn(), - }) + expect(engine.engineVariant(settings)).resolves.toEqual( 'linux-amd64-avx2-cuda-12-0' @@ -310,15 +250,6 @@ describe('test executable cortex file', () => { const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] cpuInstructions.forEach((instruction) => { - mockFork.mockReturnValue({ - on: jest.fn((event, callback) => { - if (event === 'message') { - callback(instruction) - } - }), - send: jest.fn(), - }) - expect(engine.engineVariant(settings)).resolves.toEqual( `linux-amd64-${instruction}` ) @@ -335,14 +266,7 @@ describe('test executable cortex file', () => { } const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] cpuInstructions.forEach((instruction) => { - mockFork.mockReturnValue({ - on: jest.fn((event, callback) => { - if (event === 'message') { - callback(instruction) - } - }), - send: jest.fn(), - }) + expect(engine.engineVariant(settings)).resolves.toEqual( `windows-amd64-${instruction}` ) @@ -376,14 +300,7 @@ describe('test executable cortex file', () => { } const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] cpuInstructions.forEach((instruction) => { - mockFork.mockReturnValue({ - on: jest.fn((event, callback) => { - if (event === 'message') { - callback(instruction) - } - }), - send: jest.fn(), - }) + expect(engine.engineVariant(settings)).resolves.toEqual( `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0` ) @@ -417,14 +334,7 @@ describe('test executable cortex file', () => { ], } cpuInstructions.forEach((instruction) => { - mockFork.mockReturnValue({ - on: jest.fn((event, callback) => { - if (event === 'message') { - callback(instruction) - } - }), - send: jest.fn(), - }) + expect(engine.engineVariant(settings)).resolves.toEqual( `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0` ) @@ -459,14 +369,7 @@ describe('test executable cortex file', () => { ], } cpuInstructions.forEach((instruction) => { - mockFork.mockReturnValue({ - on: jest.fn((event, callback) => { - if (event === 'message') { - callback(instruction) - } - }), - send: jest.fn(), - }) + expect(engine.engineVariant(settings)).resolves.toEqual( `linux-amd64-vulkan` ) diff --git a/extensions/engine-management-extension/src/node/index.ts b/extensions/engine-management-extension/src/node/index.ts index 31ad90ed2..4c1daf998 100644 --- a/extensions/engine-management-extension/src/node/index.ts +++ b/extensions/engine-management-extension/src/node/index.ts @@ -2,111 +2,10 @@ import * as path from 'path' import { appResourcePath, getJanDataFolderPath, - GpuSetting, log, } from '@janhq/core/node' -import { fork } from 'child_process' import { mkdir, readdir, symlink } from 'fs/promises' -/** - * The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu. - * @param settings - * @returns - */ -const gpuRunMode = (settings?: GpuSetting): string => { - if (process.platform === 'darwin') - // MacOS now has universal binaries - return '' - - if (!settings) return '' - - return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda' -} - -/** - * The OS & architecture that the current process is running on. - * @returns win, mac-x64, mac-arm64, or linux - */ -const os = (): string => { - return process.platform === 'win32' - ? 'windows-amd64' - : process.platform === 'darwin' - ? process.arch === 'arm64' - ? 'mac-arm64' - : 'mac-amd64' - : 'linux-amd64' -} - -/** - * The CUDA version that will be set - either '11-7' or '12-0'. - * @param settings - * @returns - */ -const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => { - const isUsingCuda = - settings?.vulkan !== true && - settings?.run_mode === 'gpu' && - !os().includes('mac') - - if (!isUsingCuda) return undefined - return settings?.cuda?.version === '11' ? '11-7' : '12-0' -} - -/** - * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'. - * @returns - */ -const cpuInstructions = async (): Promise => { - if (process.platform === 'darwin') return '' - - const child = fork(path.join(__dirname, './cpuInfo.js')) // Path to the child process file - - return new Promise((resolve, reject) => { - child.on('message', (cpuInfo?: string) => { - resolve(cpuInfo ?? 'noavx') - child.kill() // Kill the child process after receiving the result - }) - - child.on('error', (err) => { - resolve('noavx') - child.kill() - }) - - child.on('exit', (code) => { - if (code !== 0) { - resolve('noavx') - child.kill() - } - }) - }) -} - -/** - * Find which variant to run based on the current platform. - */ -const engineVariant = async (gpuSetting?: GpuSetting): Promise => { - const cpuInstruction = await cpuInstructions() - log(`[CORTEX]: CPU instruction: ${cpuInstruction}`) - let engineVariant = [ - os(), - gpuSetting?.vulkan - ? 'vulkan' - : gpuRunMode(gpuSetting) !== 'cuda' - ? // CPU mode - support all variants - cpuInstruction - : // GPU mode - packaged CUDA variants of avx2 and noavx - cpuInstruction === 'avx2' || cpuInstruction === 'avx512' - ? 'avx2' - : 'noavx', - gpuRunMode(gpuSetting), - cudaVersion(gpuSetting), - ] - .filter((e) => !!e) - .join('-') - - log(`[CORTEX]: Engine variant: ${engineVariant}`) - return engineVariant -} /** * Create symlink to each variant for the default bundled version @@ -148,6 +47,5 @@ const symlinkEngines = async () => { } export default { - engineVariant, symlinkEngines, } diff --git a/extensions/engine-management-extension/src/utils.ts b/extensions/engine-management-extension/src/utils.ts new file mode 100644 index 000000000..30d482313 --- /dev/null +++ b/extensions/engine-management-extension/src/utils.ts @@ -0,0 +1,86 @@ +import { GpuSetting, log } from '@janhq/core' + +/** + * The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu. + * @param settings + * @returns + */ + +const gpuRunMode = (settings?: GpuSetting): string => { + return settings.gpus?.some( + (gpu) => + gpu.activated === true && + gpu.additional_information && + gpu.additional_information.driver_version + ) + ? 'cuda' + : '' +} + +/** + * The OS & architecture that the current process is running on. + * @returns win, mac-x64, mac-arm64, or linux + */ +const os = (settings?: GpuSetting): string => { + return PLATFORM === 'win32' + ? 'windows-amd64' + : PLATFORM === 'darwin' + ? settings?.cpu?.arch === 'arm64' + ? 'mac-arm64' + : 'mac-amd64' + : 'linux-amd64' +} + +/** + * The CUDA version that will be set - either '11-7' or '12-0'. + * @param settings + * @returns + */ +const cudaVersion = (settings?: GpuSetting): '12-0' | '11-7' | undefined => { + const isUsingCuda = + settings?.vulkan !== true && + settings?.gpus?.some((gpu) => (gpu.activated === true ? 'gpu' : 'cpu')) && + !os().includes('mac') + + if (!isUsingCuda) return undefined + // return settings?.cuda?.version === '11' ? '11-7' : '12-0' + return settings.gpus?.some((gpu) => gpu.version.includes('12')) + ? '12-0' + : '11-7' +} + +/** + * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'. + * @returns + */ + +/** + * Find which variant to run based on the current platform. + */ +export const engineVariant = async ( + gpuSetting?: GpuSetting +): Promise => { + const platform = os(gpuSetting) + + // There is no need to append the variant extension for mac + if (platform.startsWith('mac')) return platform + + let engineVariant = + gpuSetting?.vulkan || gpuSetting.gpus.some((e) => !e.additional_information) + ? [platform, 'vulkan'] + : [ + platform, + gpuRunMode(gpuSetting) === 'cuda' && + (gpuSetting.cpu.instructions.includes('avx2') || + gpuSetting.cpu.instructions.includes('avx512')) + ? 'avx2' + : 'noavx', + gpuRunMode(gpuSetting), + cudaVersion(gpuSetting), + ].filter(Boolean) // Remove any falsy values + + let engineVariantString = engineVariant.join('-') + + log(`[CORTEX]: Engine variant: ${engineVariantString}`) + return engineVariantString +} diff --git a/extensions/hardware-management-extension/jest.config.js b/extensions/hardware-management-extension/jest.config.js new file mode 100644 index 000000000..8bb37208d --- /dev/null +++ b/extensions/hardware-management-extension/jest.config.js @@ -0,0 +1,5 @@ +/** @type {import('ts-jest').JestConfigWithTsJest} */ +module.exports = { + preset: 'ts-jest', + testEnvironment: 'node', +} diff --git a/extensions/hardware-management-extension/package.json b/extensions/hardware-management-extension/package.json new file mode 100644 index 000000000..ec98c7440 --- /dev/null +++ b/extensions/hardware-management-extension/package.json @@ -0,0 +1,48 @@ +{ + "name": "@janhq/hardware-management-extension", + "productName": "Hardware Management", + "version": "1.0.0", + "description": "Manages Better Hardware settings.", + "main": "dist/index.js", + "node": "dist/node/index.cjs.js", + "author": "Jan ", + "license": "MIT", + "scripts": { + "test": "jest", + "build": "rolldown -c rolldown.config.mjs", + "codesign:darwin": "../../.github/scripts/auto-sign.sh", + "codesign:win32:linux": "echo 'No codesigning required'", + "codesign": "run-script-os", + "build:publish": "rimraf *.tgz --glob || true && yarn build && yarn codesign && npm pack && cpx *.tgz ../../pre-install" + }, + "exports": { + ".": "./dist/index.js", + "./main": "./dist/module.js" + }, + "devDependencies": { + "cpx": "^1.5.0", + "rimraf": "^3.0.2", + "rolldown": "^1.0.0-beta.1", + "run-script-os": "^1.1.6", + "ts-loader": "^9.5.0", + "typescript": "^5.3.3" + }, + "dependencies": { + "@janhq/core": "../../core/package.tgz", + "cpu-instructions": "^0.0.13", + "ky": "^1.7.2", + "p-queue": "^8.0.1" + }, + "bundledDependencies": [ + "cpu-instructions", + "@janhq/core" + ], + "hardwares": { + "node": ">=18.0.0" + }, + "files": [ + "dist/*", + "package.json", + "README.md" + ] +} diff --git a/extensions/hardware-management-extension/rolldown.config.mjs b/extensions/hardware-management-extension/rolldown.config.mjs new file mode 100644 index 000000000..7982ca555 --- /dev/null +++ b/extensions/hardware-management-extension/rolldown.config.mjs @@ -0,0 +1,17 @@ +import { defineConfig } from 'rolldown' +import pkgJson from './package.json' with { type: 'json' } + +export default defineConfig([ + { + input: 'src/index.ts', + output: { + format: 'esm', + file: 'dist/index.js', + }, + define: { + NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`), + API_URL: JSON.stringify('http://127.0.0.1:39291'), + SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'), + }, + }, +]) diff --git a/extensions/hardware-management-extension/src/@types/global.d.ts b/extensions/hardware-management-extension/src/@types/global.d.ts new file mode 100644 index 000000000..6639b9cbb --- /dev/null +++ b/extensions/hardware-management-extension/src/@types/global.d.ts @@ -0,0 +1,12 @@ +declare const API_URL: string +declare const SOCKET_URL: string +declare const NODE: string + +interface Core { + api: APIFunctions + events: EventEmitter +} +interface Window { + core?: Core | undefined + electronAPI?: any | undefined +} diff --git a/extensions/hardware-management-extension/src/index.ts b/extensions/hardware-management-extension/src/index.ts new file mode 100644 index 000000000..c2edc6159 --- /dev/null +++ b/extensions/hardware-management-extension/src/index.ts @@ -0,0 +1,67 @@ +import { + executeOnMain, + HardwareManagementExtension, + HardwareInformation, +} from '@janhq/core' +import ky from 'ky' +import PQueue from 'p-queue' + +/** + * JSONHardwareManagementExtension is a HardwareManagementExtension implementation that provides + * functionality for managing engines. + */ +export default class JSONHardwareManagementExtension extends HardwareManagementExtension { + queue = new PQueue({ concurrency: 1 }) + + /** + * Called when the extension is loaded. + */ + async onLoad() { + // Run Healthcheck + this.queue.add(() => this.healthz()) + } + + /** + * Called when the extension is unloaded. + */ + onUnload() {} + + /** + * Do health check on cortex.cpp + * @returns + */ + async healthz(): Promise { + return ky + .get(`${API_URL}/healthz`, { + retry: { limit: 20, delay: () => 500, methods: ['get'] }, + }) + .then(() => {}) + } + + /** + * @returns A Promise that resolves to an object of hardware. + */ + async getHardware(): Promise { + return this.queue.add(() => + ky + .get(`${API_URL}/v1/hardware`) + .json() + .then((e) => e) + ) as Promise + } + + /** + * @returns A Promise that resolves to an object of set gpu activate. + */ + async setAvtiveGpu(data: { gpus: number[] }): Promise<{ + message: string + activated_gpus: number[] + }> { + return this.queue.add(() => + ky.post(`${API_URL}/v1/hardware/activate`, { json: data }).then((e) => e) + ) as Promise<{ + message: string + activated_gpus: number[] + }> + } +} diff --git a/extensions/monitoring-extension/tsconfig.json b/extensions/hardware-management-extension/tsconfig.json similarity index 65% rename from extensions/monitoring-extension/tsconfig.json rename to extensions/hardware-management-extension/tsconfig.json index 2477d58ce..72e1e1895 100644 --- a/extensions/monitoring-extension/tsconfig.json +++ b/extensions/hardware-management-extension/tsconfig.json @@ -8,7 +8,9 @@ "forceConsistentCasingInFileNames": true, "strict": false, "skipLibCheck": true, - "rootDir": "./src" + "rootDir": "./src", + "resolveJsonModule": true }, - "include": ["./src"] + "include": ["./src"], + "exclude": ["src/**/*.test.ts", "rolldown.config.mjs"] } diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt index 53bd3ae76..7ee7020b3 100644 --- a/extensions/inference-cortex-extension/bin/version.txt +++ b/extensions/inference-cortex-extension/bin/version.txt @@ -1 +1 @@ -1.0.9-rc7 +1.0.10 diff --git a/extensions/inference-cortex-extension/package.json b/extensions/inference-cortex-extension/package.json index f191f3071..a4558dc8f 100644 --- a/extensions/inference-cortex-extension/package.json +++ b/extensions/inference-cortex-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.24", + "version": "1.0.25", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-cortex-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json index a3a93f305..945f32729 100644 --- a/extensions/inference-cortex-extension/resources/default_settings.json +++ b/extensions/inference-cortex-extension/resources/default_settings.json @@ -76,7 +76,7 @@ }, { "key": "use_mmap", - "title": "MMAP", + "title": "mmap", "description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.", "controllerType": "checkbox", "controllerProps": { diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json new file mode 100644 index 000000000..7f98b07a1 --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf", + "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf" + } + ], + "id": "deepseek-r1-distill-llama-70b", + "object": "model", + "name": "DeepSeek R1 Distill Llama 70B Q4", + "version": "1.0", + "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|User|> {prompt} <|Assistant|>", + "llama_model_path": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf", + "ngl": 81 + }, + "parameters": { + "temperature": 0.6, + "top_p": 0.95, + "stream": true, + "max_tokens": 131072, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "DeepSeek", + "tags": ["70B", "Featured"], + "size": 42500000000 + }, + "engine": "llama-cpp" +} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json new file mode 100644 index 000000000..a3a075888 --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf", + "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf" + } + ], + "id": "deepseek-r1-distill-llama-8b", + "object": "model", + "name": "DeepSeek R1 Distill Llama 8B Q5", + "version": "1.0", + "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|User|> {prompt} <|Assistant|>", + "llama_model_path": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf", + "ngl": 33 + }, + "parameters": { + "temperature": 0.6, + "top_p": 0.95, + "stream": true, + "max_tokens": 131072, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "DeepSeek", + "tags": ["8B", "Featured"], + "size": 5730000000 + }, + "engine": "llama-cpp" +} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json new file mode 100644 index 000000000..74b3dfc54 --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf", + "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf" + } + ], + "id": "deepseek-r1-distill-qwen-1.5b", + "object": "model", + "name": "DeepSeek R1 Distill Qwen 1.5B Q5", + "version": "1.0", + "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|User|> {prompt} <|Assistant|>", + "llama_model_path": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf", + "ngl": 29 + }, + "parameters": { + "temperature": 0.6, + "top_p": 0.95, + "stream": true, + "max_tokens": 131072, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "DeepSeek", + "tags": ["1.5B", "Featured"], + "size": 1290000000 + }, + "engine": "llama-cpp" +} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json new file mode 100644 index 000000000..594ba6e41 --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf", + "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf" + } + ], + "id": "deepseek-r1-distill-qwen-14b", + "object": "model", + "name": "DeepSeek R1 Distill Qwen 14B Q4", + "version": "1.0", + "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|User|> {prompt} <|Assistant|>", + "llama_model_path": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf", + "ngl": 49 + }, + "parameters": { + "temperature": 0.6, + "top_p": 0.95, + "stream": true, + "max_tokens": 131072, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "DeepSeek", + "tags": ["14B", "Featured"], + "size": 8990000000 + }, + "engine": "llama-cpp" +} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json new file mode 100644 index 000000000..6d7f5accf --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf", + "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf" + } + ], + "id": "deepseek-r1-distill-qwen-32b", + "object": "model", + "name": "DeepSeek R1 Distill Qwen 32B Q4", + "version": "1.0", + "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|User|> {prompt} <|Assistant|>", + "llama_model_path": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf", + "ngl": 65 + }, + "parameters": { + "temperature": 0.6, + "top_p": 0.95, + "stream": true, + "max_tokens": 131072, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "DeepSeek", + "tags": ["32B", "Featured"], + "size": 19900000000 + }, + "engine": "llama-cpp" +} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json new file mode 100644 index 000000000..eae53cf0e --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf", + "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf" + } + ], + "id": "deepseek-r1-distill-qwen-7b", + "object": "model", + "name": "DeepSeek R1 Distill Qwen 7B Q5", + "version": "1.0", + "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|User|> {prompt} <|Assistant|>", + "llama_model_path": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf", + "ngl": 29 + }, + "parameters": { + "temperature": 0.6, + "top_p": 0.95, + "stream": true, + "max_tokens": 131072, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "DeepSeek", + "tags": ["7B", "Featured"], + "size": 5440000000 + }, + "engine": "llama-cpp" +} diff --git a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json index ec9a0284b..1aeb80450 100644 --- a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json @@ -22,19 +22,13 @@ "top_p": 0.95, "stream": true, "max_tokens": 8192, - "stop": [ - "<|end_of_text|>", - "<|eot_id|>", - "<|eom_id|>" - ], + "stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"], "frequency_penalty": 0, "presence_penalty": 0 }, "metadata": { "author": "MetaAI", - "tags": [ - "8B", "Featured" - ], + "tags": ["8B", "Featured"], "size": 4920000000 }, "engine": "llama-cpp" diff --git a/extensions/inference-cortex-extension/rolldown.config.mjs b/extensions/inference-cortex-extension/rolldown.config.mjs index 278664d3d..aebd8ac38 100644 --- a/extensions/inference-cortex-extension/rolldown.config.mjs +++ b/extensions/inference-cortex-extension/rolldown.config.mjs @@ -49,6 +49,13 @@ import qwen2514bJson from './resources/models/qwen2.5-14b-instruct/model.json' w import qwen2532bJson from './resources/models/qwen2.5-32b-instruct/model.json' with { type: 'json' } import qwen2572bJson from './resources/models/qwen2.5-72b-instruct/model.json' with { type: 'json' } +import deepseekR1DistillQwen_1_5b from './resources/models/deepseek-r1-distill-qwen-1.5b/model.json' with { type: 'json' } +import deepseekR1DistillQwen_7b from './resources/models/deepseek-r1-distill-qwen-7b/model.json' with { type: 'json' } +import deepseekR1DistillQwen_14b from './resources/models/deepseek-r1-distill-qwen-14b/model.json' with { type: 'json' } +import deepseekR1DistillQwen_32b from './resources/models/deepseek-r1-distill-qwen-32b/model.json' with { type: 'json' } +import deepseekR1DistillLlama_8b from './resources/models/deepseek-r1-distill-llama-8b/model.json' with { type: 'json' } +import deepseekR1DistillLlama_70b from './resources/models/deepseek-r1-distill-llama-70b/model.json' with { type: 'json' } + export default defineConfig([ { input: 'src/index.ts', @@ -106,6 +113,12 @@ export default defineConfig([ qwen2514bJson, qwen2532bJson, qwen2572bJson, + deepseekR1DistillQwen_1_5b, + deepseekR1DistillQwen_7b, + deepseekR1DistillQwen_14b, + deepseekR1DistillQwen_32b, + deepseekR1DistillLlama_8b, + deepseekR1DistillLlama_70b, ]), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), SETTINGS: JSON.stringify(defaultSettingJson), diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index 84cc49b94..05efaf735 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -112,8 +112,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number // Run the process watchdog - const systemInfo = await systemInformation() - this.queue.add(() => executeOnMain(NODE, 'run', systemInfo)) + // const systemInfo = await systemInformation() + this.queue.add(() => executeOnMain(NODE, 'run')) this.queue.add(() => this.healthz()) this.subscribeToEvents() diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts index 420c84b6e..5222bb156 100644 --- a/extensions/inference-cortex-extension/src/node/index.ts +++ b/extensions/inference-cortex-extension/src/node/index.ts @@ -16,15 +16,20 @@ let watchdog: ProcessWatchdog | undefined = undefined * Spawns a Nitro subprocess. * @returns A promise that resolves when the Nitro subprocess is started. */ -function run(systemInfo?: SystemInformation): Promise { +function run(): Promise { log(`[CORTEX]:: Spawning cortex subprocess...`) return new Promise(async (resolve, reject) => { - let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? '' - let binaryName = `cortex-server${process.platform === 'win32' ? '.exe' : ''}` + // let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? '' + let binaryName = `cortex-server${ + process.platform === 'win32' ? '.exe' : '' + }` const binPath = path.join(__dirname, '..', 'bin') const executablePath = path.join(binPath, binaryName) + + addEnvPaths(binPath) + const sharedPath = path.join(appResourcePath(), 'shared') // Execute the binary log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`) @@ -44,15 +49,17 @@ function run(systemInfo?: SystemInformation): Promise { `${path.join(dataFolderPath, '.janrc')}`, '--data_folder_path', dataFolderPath, + '--loglevel', + 'INFO', ], { env: { ...process.env, - CUDA_VISIBLE_DEVICES: gpuVisibleDevices, - // Vulkan - Support 1 device at a time for now - ...(gpuVisibleDevices?.length > 0 && { - GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices, - }), + // CUDA_VISIBLE_DEVICES: gpuVisibleDevices, + // // Vulkan - Support 1 device at a time for now + // ...(gpuVisibleDevices?.length > 0 && { + // GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices, + // }), }, cwd: sharedPath, } @@ -71,6 +78,22 @@ function dispose() { watchdog?.terminate() } +/** + * Set the environment paths for the cortex subprocess + * @param dest + */ +function addEnvPaths(dest: string) { + // Add engine path to the PATH and LD_LIBRARY_PATH + if (process.platform === 'win32') { + process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest) + } else { + process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( + path.delimiter, + dest + ) + } +} + /** * Cortex process info */ diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts index 719671cfd..105f7ad91 100644 --- a/extensions/model-extension/src/index.ts +++ b/extensions/model-extension/src/index.ts @@ -15,8 +15,6 @@ import { } from '@janhq/core' import { CortexAPI } from './cortex' import { scanModelsFolder } from './legacy/model-json' -import { downloadModel } from './legacy/download' -import { systemInformation } from '@janhq/core' import { deleteModelFiles } from './legacy/delete' export enum Settings { @@ -71,18 +69,6 @@ export default class JanModelExtension extends ModelExtension { * @returns A Promise that resolves when the model is downloaded. */ async pullModel(model: string, id?: string, name?: string): Promise { - if (id) { - const model: Model = ModelManager.instance().get(id) - // Clip vision model - should not be handled by cortex.cpp - // TensorRT model - should not be handled by cortex.cpp - if ( - model && - (model.engine === InferenceEngine.nitro_tensorrt_llm || - model.settings.vision_model) - ) { - return downloadModel(model, (await systemInformation()).gpuSetting) - } - } /** * Sending POST to /models/pull/{id} endpoint to pull the model */ diff --git a/extensions/model-extension/src/legacy/download.ts b/extensions/model-extension/src/legacy/download.ts index d4d6c62d9..570d0cd13 100644 --- a/extensions/model-extension/src/legacy/download.ts +++ b/extensions/model-extension/src/legacy/download.ts @@ -2,15 +2,12 @@ import { downloadFile, DownloadRequest, fs, - GpuSetting, - InferenceEngine, joinPath, Model, } from '@janhq/core' export const downloadModel = async ( model: Model, - gpuSettings?: GpuSetting, network?: { ignoreSSL?: boolean; proxy?: string } ): Promise => { const homedir = 'file://models' @@ -27,41 +24,6 @@ export const downloadModel = async ( JSON.stringify(model, null, 2) ) - if (model.engine === InferenceEngine.nitro_tensorrt_llm) { - if (!gpuSettings || gpuSettings.gpus.length === 0) { - console.error('No GPU found. Please check your GPU setting.') - return - } - const firstGpu = gpuSettings.gpus[0] - if (!firstGpu.name.toLowerCase().includes('nvidia')) { - console.error('No Nvidia GPU found. Please check your GPU setting.') - return - } - const gpuArch = firstGpu.arch - if (gpuArch === undefined) { - console.error('No GPU architecture found. Please check your GPU setting.') - return - } - - if (!supportedGpuArch.includes(gpuArch)) { - console.debug( - `Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.` - ) - return - } - - const os = 'windows' // TODO: remove this hard coded value - - const newSources = model.sources.map((source) => { - const newSource = { ...source } - newSource.url = newSource.url - .replace(//g, os) - .replace(//g, gpuArch) - return newSource - }) - model.sources = newSources - } - console.debug(`Download sources: ${JSON.stringify(model.sources)}`) if (model.sources.length > 1) { diff --git a/extensions/monitoring-extension/README.md b/extensions/monitoring-extension/README.md deleted file mode 100644 index f9690da09..000000000 --- a/extensions/monitoring-extension/README.md +++ /dev/null @@ -1,75 +0,0 @@ -# Create a Jan Extension using Typescript - -Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀 - -## Create Your Own Extension - -To create your own extension, you can use this repository as a template! Just follow the below instructions: - -1. Click the Use this template button at the top of the repository -2. Select Create a new repository -3. Select an owner and name for your new repository -4. Click Create repository -5. Clone your new repository - -## Initial Setup - -After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension. - -> [!NOTE] -> -> You'll need to have a reasonably modern version of -> [Node.js](https://nodejs.org) handy. If you are using a version manager like -> [`nodenv`](https://github.com/nodenv/nodenv) or -> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the -> root of your repository to install the version specified in -> [`package.json`](./package.json). Otherwise, 20.x or later should work! - -1. :hammer_and_wrench: Install the dependencies - - ```bash - npm install - ``` - -1. :building_construction: Package the TypeScript for distribution - - ```bash - npm run bundle - ``` - -1. :white_check_mark: Check your artifact - - There will be a tgz file in your extension directory now - -## Update the Extension Metadata - -The [`package.json`](package.json) file defines metadata about your extension, such as -extension name, main entry, description and version. - -When you copy this repository, update `package.json` with the name, description for your extension. - -## Update the Extension Code - -The [`src/`](./src/) directory is the heart of your extension! This contains the -source code that will be run when your extension functions are invoked. You can replace the -contents of this directory with your own code. - -There are a few things to keep in mind when writing your extension code: - -- Most Jan Extension functions are processed asynchronously. - In `index.ts`, you will see that the extension function will return a `Promise`. - - ```typescript - import { events, MessageEvent, MessageRequest } from '@janhq/core' - - function onStart(): Promise { - return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => - this.inference(data) - ) - } - ``` - - For more information about the Jan Extension Core module, see the - [documentation](https://github.com/janhq/jan/blob/main/core/README.md). - -So, what are you waiting for? Go ahead and start customizing your extension! diff --git a/extensions/monitoring-extension/bin/.gitkeep b/extensions/monitoring-extension/bin/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/extensions/monitoring-extension/download.bat b/extensions/monitoring-extension/download.bat deleted file mode 100644 index 14e0aadd9..000000000 --- a/extensions/monitoring-extension/download.bat +++ /dev/null @@ -1,2 +0,0 @@ -@echo off -.\node_modules\.bin\download https://catalog.jan.ai/vulkaninfoSDK.exe -o ./bin \ No newline at end of file diff --git a/extensions/monitoring-extension/package.json b/extensions/monitoring-extension/package.json deleted file mode 100644 index 2f827b41b..000000000 --- a/extensions/monitoring-extension/package.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "name": "@janhq/monitoring-extension", - "productName": "System Monitoring", - "version": "1.0.10", - "description": "Provides system health and OS level data.", - "main": "dist/index.js", - "node": "dist/node/index.cjs.js", - "author": "Jan ", - "license": "AGPL-3.0", - "scripts": { - "build": "rolldown -c rolldown.config.mjs && yarn download-artifacts", - "download-artifacts": "run-script-os && cpx \"bin/**\" \"dist/bin\"", - "download-artifacts:darwin": "echo 'No artifacts to download for darwin'", - "download-artifacts:win32": "download.bat", - "download-artifacts:linux": "download https://catalog.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo", - "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install" - }, - "exports": { - ".": "./dist/index.js", - "./main": "./dist/node/index.cjs.js" - }, - "devDependencies": { - "@types/node": "^20.11.4", - "@types/node-os-utils": "^1.3.4", - "cpx": "^1.5.0", - "download-cli": "^1.1.1", - "rimraf": "^3.0.2", - "rolldown": "1.0.0-beta.1", - "run-script-os": "^1.1.6", - "typescript": "^5.3.3" - }, - "dependencies": { - "@janhq/core": "../../core/package.tgz", - "node-os-utils": "^1.3.7" - }, - "files": [ - "dist/*", - "package.json", - "README.md" - ], - "bundleDependencies": [ - "node-os-utils", - "@janhq/core" - ], - "installConfig": { - "hoistingLimits": "workspaces" - }, - "packageManager": "yarn@4.5.3" -} diff --git a/extensions/monitoring-extension/resources/settings.json b/extensions/monitoring-extension/resources/settings.json deleted file mode 100644 index 40b0b97f9..000000000 --- a/extensions/monitoring-extension/resources/settings.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - { - "key": "log-enabled", - "title": "Enable App Logs", - "description": "Saves app logs locally on your computer. This enables you to send us crash reports.", - "controllerType": "checkbox", - "controllerProps": { - "value": true - } - }, - { - "key": "log-cleaning-interval", - "title": "Log Cleaning Interval", - "description": "Automatically delete local logs after a certain time interval (in milliseconds).", - "controllerType": "input", - "controllerProps": { - "value": "120000", - "placeholder": "Interval in milliseconds. E.g. 120000", - "textAlign": "right" - } - } -] \ No newline at end of file diff --git a/extensions/monitoring-extension/rolldown.config.mjs b/extensions/monitoring-extension/rolldown.config.mjs deleted file mode 100644 index 3533e052b..000000000 --- a/extensions/monitoring-extension/rolldown.config.mjs +++ /dev/null @@ -1,32 +0,0 @@ -import { defineConfig } from 'rolldown' -import packageJson from './package.json' with { type: 'json' } -import settingJson from './resources/settings.json' with { type: 'json' } - -export default defineConfig([ - { - input: 'src/index.ts', - output: { - format: 'esm', - file: 'dist/index.js', - }, - platform: 'browser', - define: { - NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), - SETTINGS: JSON.stringify(settingJson), - }, - }, - { - input: 'src/node/index.ts', - external: ['@janhq/core/node'], - output: { - format: 'cjs', - file: 'dist/node/index.cjs.js', - sourcemap: false, - inlineDynamicImports: true, - }, - resolve: { - extensions: ['.js', '.ts', '.json'], - }, - platform: 'node', - }, -]) diff --git a/extensions/monitoring-extension/src/@types/global.d.ts b/extensions/monitoring-extension/src/@types/global.d.ts deleted file mode 100644 index 7536fabd8..000000000 --- a/extensions/monitoring-extension/src/@types/global.d.ts +++ /dev/null @@ -1,19 +0,0 @@ -declare const NODE: string -declare const SETTINGS: SettingComponentProps[] - -type CpuGpuInfo = { - cpu: { - usage: number - } - gpu: GpuInfo[] -} - -type GpuInfo = { - id: string - name: string - temperature: string - utilization: string - memoryTotal: string - memoryFree: string - memoryUtilization: string -} diff --git a/extensions/monitoring-extension/src/index.ts b/extensions/monitoring-extension/src/index.ts deleted file mode 100644 index 5616c70a8..000000000 --- a/extensions/monitoring-extension/src/index.ts +++ /dev/null @@ -1,90 +0,0 @@ -import { - AppConfigurationEventName, - GpuSetting, - MonitoringExtension, - OperatingSystemInfo, - events, - executeOnMain, -} from '@janhq/core' - -enum Settings { - logEnabled = 'log-enabled', - logCleaningInterval = 'log-cleaning-interval', -} -/** - * JanMonitoringExtension is a extension that provides system monitoring functionality. - * It implements the MonitoringExtension interface from the @janhq/core package. - */ -export default class JanMonitoringExtension extends MonitoringExtension { - /** - * Called when the extension is loaded. - */ - async onLoad() { - // Register extension settings - this.registerSettings(SETTINGS) - - const logEnabled = await this.getSetting(Settings.logEnabled, true) - const logCleaningInterval = parseInt( - await this.getSetting(Settings.logCleaningInterval, '120000') - ) - // Register File Logger provided by this extension - await executeOnMain(NODE, 'registerLogger', { - logEnabled, - logCleaningInterval: isNaN(logCleaningInterval) - ? 120000 - : logCleaningInterval, - }) - - // Attempt to fetch nvidia info - await executeOnMain(NODE, 'updateNvidiaInfo') - events.emit(AppConfigurationEventName.OnConfigurationUpdate, {}) - } - - onSettingUpdate(key: string, value: T): void { - if (key === Settings.logEnabled) { - executeOnMain(NODE, 'updateLogger', { logEnabled: value }) - } else if (key === Settings.logCleaningInterval) { - executeOnMain(NODE, 'updateLogger', { logCleaningInterval: value }) - } - } - - /** - * Called when the extension is unloaded. - */ - onUnload(): void { - // Register File Logger provided by this extension - executeOnMain(NODE, 'unregisterLogger') - } - - /** - * Returns the GPU configuration. - * @returns A Promise that resolves to an object containing the GPU configuration. - */ - async getGpuSetting(): Promise { - return executeOnMain(NODE, 'getGpuConfig') - } - - /** - * Returns information about the system resources. - * @returns A Promise that resolves to an object containing information about the system resources. - */ - getResourcesInfo(): Promise { - return executeOnMain(NODE, 'getResourcesInfo') - } - - /** - * Returns information about the current system load. - * @returns A Promise that resolves to an object containing information about the current system load. - */ - getCurrentLoad(): Promise { - return executeOnMain(NODE, 'getCurrentLoad') - } - - /** - * Returns information about the OS - * @returns - */ - getOsInfo(): Promise { - return executeOnMain(NODE, 'getOsInfo') - } -} diff --git a/extensions/monitoring-extension/src/node/index.ts b/extensions/monitoring-extension/src/node/index.ts deleted file mode 100644 index e32f85082..000000000 --- a/extensions/monitoring-extension/src/node/index.ts +++ /dev/null @@ -1,389 +0,0 @@ -import { - GpuSetting, - GpuSettingInfo, - LoggerManager, - OperatingSystemInfo, - ResourceInfo, - SupportedPlatforms, - getJanDataFolderPath, - log, -} from '@janhq/core/node' -import { mem, cpu } from 'node-os-utils' -import { exec } from 'child_process' -import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs' -import path from 'path' -import os from 'os' -import { FileLogger } from './logger' - -/** - * Path to the settings directory - **/ -export const SETTINGS_DIR = path.join(getJanDataFolderPath(), 'settings') -/** - * Path to the settings file - **/ -export const GPU_INFO_FILE = path.join(SETTINGS_DIR, 'settings.json') - -/** - * Default GPU settings - * TODO: This needs to be refactored to support multiple accelerators - **/ -const DEFAULT_SETTINGS: GpuSetting = { - notify: true, - run_mode: 'cpu', - nvidia_driver: { - exist: false, - version: '', - }, - cuda: { - exist: false, - version: '', - }, - gpus: [], - gpu_highest_vram: '', - gpus_in_use: [], - is_initial: true, - // TODO: This needs to be set based on user toggle in settings - vulkan: false, -} - -export const getGpuConfig = async (): Promise => { - if (process.platform === 'darwin') return undefined - if (existsSync(GPU_INFO_FILE)) - return JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8')) - return DEFAULT_SETTINGS -} - -export const getResourcesInfo = async (): Promise => { - const ramUsedInfo = await mem.used() - const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024 - const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024 - - const resourceInfo: ResourceInfo = { - mem: { - totalMemory, - usedMemory, - }, - } - - return resourceInfo -} - -export const getCurrentLoad = () => - new Promise(async (resolve, reject) => { - const cpuPercentage = await cpu.usage() - let data = { - run_mode: 'cpu', - gpus_in_use: [], - } - - if (process.platform !== 'darwin') { - data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8')) - } - - if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) { - const gpuIds = data.gpus_in_use.join(',') - if (gpuIds !== '' && data['vulkan'] !== true) { - exec( - `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`, - (error, stdout, _) => { - if (error) { - console.error(`exec error: ${error}`) - throw new Error(error.message) - } - const gpuInfo: GpuInfo[] = stdout - .trim() - .split('\n') - .map((line) => { - const [ - id, - name, - temperature, - utilization, - memoryTotal, - memoryFree, - memoryUtilization, - ] = line.split(', ').map((item) => item.replace(/\r/g, '')) - return { - id, - name, - temperature, - utilization, - memoryTotal, - memoryFree, - memoryUtilization, - } - }) - - resolve({ - cpu: { usage: cpuPercentage }, - gpu: gpuInfo, - }) - } - ) - } else { - // Handle the case where gpuIds is empty - resolve({ - cpu: { usage: cpuPercentage }, - gpu: [], - }) - } - } else { - // Handle the case where run_mode is not 'gpu' or no GPUs are in use - resolve({ - cpu: { usage: cpuPercentage }, - gpu: [], - }) - } - }) - -/** - * This will retrieve GPU information and persist settings.json - * Will be called when the extension is loaded to turn on GPU acceleration if supported - */ -export const updateNvidiaInfo = async () => { - // ignore if macos - if (process.platform === 'darwin') return - - try { - JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8')) - } catch (error) { - if (!existsSync(SETTINGS_DIR)) { - mkdirSync(SETTINGS_DIR, { - recursive: true, - }) - } - writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2)) - } - - await updateNvidiaDriverInfo() - await updateGpuInfo() -} - -const updateNvidiaDriverInfo = async () => - new Promise((resolve, reject) => { - exec( - 'nvidia-smi --query-gpu=driver_version --format=csv,noheader', - (error, stdout) => { - const data: GpuSetting = JSON.parse( - readFileSync(GPU_INFO_FILE, 'utf-8') - ) - - if (!error) { - const firstLine = stdout.split('\n')[0].trim() - data.nvidia_driver.exist = true - data.nvidia_driver.version = firstLine - } else { - data.nvidia_driver.exist = false - } - - writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2)) - resolve({}) - } - ) - }) - -const getGpuArch = (gpuName: string): string => { - if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown' - - if (gpuName.includes('30')) return 'ampere' - else if (gpuName.includes('40')) return 'ada' - else return 'unknown' -} - -const updateGpuInfo = async () => - new Promise((resolve, reject) => { - let data: GpuSetting = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8')) - - // Cuda - if (data.vulkan === true) { - // Vulkan - exec( - process.platform === 'win32' - ? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary` - : `${__dirname}/../bin/vulkaninfo --summary`, - async (error, stdout) => { - if (!error) { - const output = stdout.toString() - - log(output) - const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g - - const gpus: GpuSettingInfo[] = [] - let match - while ((match = gpuRegex.exec(output)) !== null) { - const id = match[1] - const name = match[2] - const arch = getGpuArch(name) - gpus.push({ id, vram: '0', name, arch }) - } - data.gpus = gpus - - if (!data.gpus_in_use || data.gpus_in_use.length === 0) { - data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0'] - } - - data = await updateCudaExistence(data) - writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2)) - log(`[APP]::${JSON.stringify(data)}`) - resolve({}) - } else { - reject(error) - } - } - ) - } else { - exec( - 'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits', - async (error, stdout) => { - if (!error) { - log(`[SPECS]::${stdout}`) - // Get GPU info and gpu has higher memory first - let highestVram = 0 - let highestVramId = '0' - const gpus: GpuSettingInfo[] = stdout - .trim() - .split('\n') - .map((line) => { - let [id, vram, name] = line.split(', ') - const arch = getGpuArch(name) - vram = vram.replace(/\r/g, '') - if (parseFloat(vram) > highestVram) { - highestVram = parseFloat(vram) - highestVramId = id - } - return { id, vram, name, arch } - }) - - data.gpus = gpus - data.gpu_highest_vram = highestVramId - } else { - data.gpus = [] - data.gpu_highest_vram = undefined - } - - if (!data.gpus_in_use || data.gpus_in_use.length === 0) { - data.gpus_in_use = data.gpu_highest_vram ? [data.gpu_highest_vram].filter(e => !!e) : [] - } - - data = await updateCudaExistence(data) - console.log('[MONITORING]::Cuda info: ', data) - writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2)) - log(`[APP]::${JSON.stringify(data)}`) - resolve({}) - } - ) - } - }) - -/** - * Check if file exists in paths - */ -const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => { - return paths.some((p) => existsSync(path.join(p, file))) -} - -/** - * Validate cuda for linux and windows - */ -const updateCudaExistence = async ( - data: GpuSetting = DEFAULT_SETTINGS -): Promise => { - let filesCuda12: string[] - let filesCuda11: string[] - let paths: string[] - let cudaVersion: string = '' - - if (process.platform === 'win32') { - filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll'] - filesCuda11 = ['cublas64_11.dll', 'cudart64_110.dll', 'cublasLt64_11.dll'] - paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : [] - } else { - filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12'] - filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11'] - paths = process.env.LD_LIBRARY_PATH - ? process.env.LD_LIBRARY_PATH.split(path.delimiter) - : [] - paths.push('/usr/lib/x86_64-linux-gnu/') - } - - let cudaExists = filesCuda12.every( - (file) => existsSync(file) || checkFileExistenceInPaths(file, paths) - ) - - if (!cudaExists) { - cudaExists = filesCuda11.every( - (file) => existsSync(file) || checkFileExistenceInPaths(file, paths) - ) - if (cudaExists) { - cudaVersion = '11' - } - } else { - cudaVersion = '12' - } - - data.cuda.exist = cudaExists - data.cuda.version = cudaVersion - - console.debug(data.is_initial, data.gpus_in_use) - - if (cudaExists && data.is_initial && data.gpus_in_use.length > 0) { - data.run_mode = 'gpu' - } - - data.is_initial = false - - // Attempt to query CUDA using NVIDIA SMI - if (!cudaExists) { - await new Promise((resolve) => { - exec('nvidia-smi', (error, stdout) => { - if (!error) { - const regex = /CUDA\s*Version:\s*(\d+\.\d+)/g - const match = regex.exec(stdout) - if (match && match[1]) { - data.cuda.version = match[1] - } - } - console.log('[MONITORING]::Finalized cuda info update: ', data) - resolve() - }) - }) - } - return data -} - -export const getOsInfo = (): OperatingSystemInfo => { - const platform = - SupportedPlatforms.find((p) => p === process.platform) || 'unknown' - - const osInfo: OperatingSystemInfo = { - platform: platform, - arch: process.arch, - release: os.release(), - machine: os.machine(), - version: os.version(), - totalMem: os.totalmem(), - freeMem: os.freemem(), - } - - return osInfo -} - -export const registerLogger = ({ logEnabled, logCleaningInterval }) => { - const logger = new FileLogger(logEnabled, logCleaningInterval) - LoggerManager.instance().register(logger) - logger.cleanLogs() -} - -export const unregisterLogger = () => { - LoggerManager.instance().unregister('file') -} - -export const updateLogger = ({ logEnabled, logCleaningInterval }) => { - const logger = LoggerManager.instance().loggers.get('file') as FileLogger - if (logger && logEnabled !== undefined) logger.logEnabled = logEnabled - if (logger && logCleaningInterval) - logger.logCleaningInterval = logCleaningInterval - // Rerun - logger && logger.cleanLogs() -} diff --git a/server/cortex.json b/server/cortex.json index 917cff354..0a9b83efd 100644 --- a/server/cortex.json +++ b/server/cortex.json @@ -5,77 +5,470 @@ "post": { "operationId": "AssistantsController_create", "summary": "Create assistant", - "description": "Creates a new assistant.", - "parameters": [], + "description": "Creates a new assistant with the specified configuration.", "requestBody": { "required": true, "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/CreateAssistantDto" + "type": "object", + "properties": { + "model": { + "type": "string", + "description": "The model identifier to use for the assistant." + }, + "name": { + "type": "string", + "description": "The name of the assistant." + }, + "description": { + "type": "string", + "description": "The description of the assistant." + }, + "instructions": { + "type": "string", + "description": "Instructions for the assistant's behavior." + }, + "tools": { + "type": "array", + "description": "A list of tools enabled on the assistant. Maximum of 128 tools.", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "code_interpreter", + "file_search", + "function" + ] + } + } + } + }, + "tool_resources": { + "type": "object", + "description": "Resources used by the assistant's tools.", + "properties": { + "code_interpreter": { + "type": "object" + }, + "file_search": { + "type": "object" + } + } + }, + "metadata": { + "type": "object", + "description": "Set of key-value pairs for the assistant.", + "additionalProperties": true + }, + "temperature": { + "type": "number", + "format": "float", + "description": "Temperature parameter for response generation." + }, + "top_p": { + "type": "number", + "format": "float", + "description": "Top p parameter for response generation." + }, + "response_format": { + "oneOf": [ + { + "type": "string", + "enum": ["auto"] + }, + { + "type": "object" + } + ] + } + }, + "required": ["model"] } } } }, - "responses": { - "201": { - "description": "The assistant has been successfully created." - } - }, - "tags": ["Assistants"] - }, - "get": { - "operationId": "AssistantsController_findAll", - "summary": "List assistants", - "description": "Returns a list of assistants.", - "parameters": [ - { - "name": "limit", - "required": false, - "in": "query", - "description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.", - "schema": { - "type": "number" - } - }, - { - "name": "order", - "required": false, - "in": "query", - "description": "Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.", - "schema": { - "type": "string" - } - }, - { - "name": "after", - "required": false, - "in": "query", - "description": "A cursor for use in pagination. after is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.", - "schema": { - "type": "string" - } - }, - { - "name": "before", - "required": false, - "in": "query", - "description": "A cursor for use in pagination. before is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.", - "schema": { - "type": "string" - } - } - ], "responses": { "200": { "description": "Ok", "content": { "application/json": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/AssistantEntity" + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The unique identifier of the assistant." + }, + "object": { + "type": "string", + "enum": ["assistant"], + "description": "The object type, which is always 'assistant'." + }, + "created_at": { + "type": "integer", + "description": "Unix timestamp (in seconds) of when the assistant was created." + }, + "model": { + "type": "string", + "description": "The model identifier used by the assistant." + }, + "name": { + "type": "string", + "description": "The name of the assistant." + }, + "description": { + "type": "string", + "description": "The description of the assistant." + }, + "instructions": { + "type": "string", + "description": "Instructions for the assistant's behavior." + }, + "tools": { + "type": "array", + "description": "A list of tools enabled on the assistant.", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "code_interpreter", + "file_search", + "function" + ] + } + } + } + }, + "tool_resources": { + "type": "object", + "description": "Resources used by the assistant's tools.", + "properties": { + "code_interpreter": { + "type": "object" + }, + "file_search": { + "type": "object" + } + } + }, + "metadata": { + "type": "object", + "description": "Set of key-value pairs that can be attached to the assistant.", + "additionalProperties": true + }, + "temperature": { + "type": "number", + "format": "float", + "description": "Temperature parameter for response generation." + }, + "top_p": { + "type": "number", + "format": "float", + "description": "Top p parameter for response generation." + }, + "response_format": { + "oneOf": [ + { + "type": "string", + "enum": ["auto"] + }, + { + "type": "object" + } + ] + } + }, + "required": [ + "id", + "object", + "created_at", + "model", + "metadata" + ] + } + } + } + } + }, + "tags": ["Assistants"] + }, + "patch": { + "operationId": "AssistantsController_update", + "summary": "Update assistant", + "description": "Updates an assistant. Requires at least one modifiable field.", + "parameters": [ + { + "name": "id", + "required": true, + "in": "path", + "description": "The unique identifier of the assistant.", + "schema": { + "type": "string" + } + }, + { + "name": "OpenAI-Beta", + "required": true, + "in": "header", + "description": "Beta feature header.", + "schema": { + "type": "string", + "enum": ["assistants=v2"] + } + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "model": { + "type": "string", + "description": "The model identifier to use for the assistant." + }, + "name": { + "type": "string", + "description": "The name of the assistant." + }, + "description": { + "type": "string", + "description": "The description of the assistant." + }, + "instructions": { + "type": "string", + "description": "Instructions for the assistant's behavior." + }, + "tools": { + "type": "array", + "description": "A list of tools enabled on the assistant. Maximum of 128 tools.", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "code_interpreter", + "file_search", + "function" + ] + } + } + } + }, + "tool_resources": { + "type": "object", + "description": "Resources used by the assistant's tools.", + "properties": { + "code_interpreter": { + "type": "object" + }, + "file_search": { + "type": "object" + } + } + }, + "metadata": { + "type": "object", + "description": "Set of key-value pairs for the assistant.", + "additionalProperties": true + }, + "temperature": { + "type": "number", + "format": "float", + "description": "Temperature parameter for response generation." + }, + "top_p": { + "type": "number", + "format": "float", + "description": "Top p parameter for response generation." + }, + "response_format": { + "oneOf": [ + { + "type": "string", + "enum": ["auto"] + }, + { + "type": "object" + } + ] } + }, + "minProperties": 1 + } + } + } + }, + "responses": { + "200": { + "description": "Ok", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The unique identifier of the assistant." + }, + "object": { + "type": "string", + "enum": ["assistant"], + "description": "The object type, which is always 'assistant'." + }, + "created_at": { + "type": "integer", + "description": "Unix timestamp (in seconds) of when the assistant was created." + }, + "model": { + "type": "string", + "description": "The model identifier used by the assistant." + }, + "name": { + "type": "string", + "description": "The name of the assistant." + }, + "description": { + "type": "string", + "description": "The description of the assistant." + }, + "instructions": { + "type": "string", + "description": "Instructions for the assistant's behavior." + }, + "tools": { + "type": "array", + "description": "A list of tools enabled on the assistant.", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "code_interpreter", + "file_search", + "function" + ] + } + } + } + }, + "tool_resources": { + "type": "object", + "description": "Resources used by the assistant's tools.", + "properties": { + "code_interpreter": { + "type": "object" + }, + "file_search": { + "type": "object" + } + } + }, + "metadata": { + "type": "object", + "description": "Set of key-value pairs that can be attached to the assistant.", + "additionalProperties": true + }, + "temperature": { + "type": "number", + "format": "float", + "description": "Temperature parameter for response generation." + }, + "top_p": { + "type": "number", + "format": "float", + "description": "Top p parameter for response generation." + }, + "response_format": { + "oneOf": [ + { + "type": "string", + "enum": ["auto"] + }, + { + "type": "object" + } + ] + } + }, + "required": [ + "id", + "object", + "created_at", + "model", + "metadata" + ] + } + } + } + } + }, + "tags": ["Assistants"] + }, + "get": { + "operationId": "AssistantsController_list", + "summary": "List assistants", + "description": "Returns a list of assistants.", + "responses": { + "200": { + "description": "Ok", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "object": { + "type": "string", + "enum": ["list"], + "description": "The object type, which is always 'list' for a list response." + }, + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The unique identifier of the assistant." + }, + "object": { + "type": "string", + "enum": ["assistant"], + "description": "The object type, which is always 'assistant'." + }, + "created_at": { + "type": "integer", + "description": "Unix timestamp (in seconds) of when the assistant was created." + }, + "model": { + "type": "string", + "description": "The model identifier used by the assistant." + }, + "metadata": { + "type": "object", + "description": "Set of key-value pairs that can be attached to the assistant.", + "additionalProperties": true + } + }, + "required": [ + "id", + "object", + "created_at", + "model", + "metadata" + ] + } + } + }, + "required": ["object", "data"] } } } @@ -88,7 +481,77 @@ "get": { "operationId": "AssistantsController_findOne", "summary": "Get assistant", - "description": "Retrieves a specific assistant defined by an assistant's `id`.", + "description": "Retrieves a specific assistant by ID.", + "parameters": [ + { + "name": "id", + "required": true, + "in": "path", + "description": "The unique identifier of the assistant.", + "schema": { + "type": "string" + } + }, + { + "name": "OpenAI-Beta", + "required": true, + "in": "header", + "description": "Beta feature header.", + "schema": { + "type": "string", + "enum": ["assistants=v2"] + } + } + ], + "responses": { + "200": { + "description": "Ok", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The unique identifier of the assistant." + }, + "object": { + "type": "string", + "enum": ["assistant"], + "description": "The object type, which is always 'assistant'." + }, + "created_at": { + "type": "integer", + "description": "Unix timestamp (in seconds) of when the assistant was created." + }, + "model": { + "type": "string", + "description": "The model identifier used by the assistant." + }, + "metadata": { + "type": "object", + "description": "Set of key-value pairs attached to the assistant.", + "additionalProperties": true + } + }, + "required": [ + "id", + "object", + "created_at", + "model", + "metadata" + ] + } + } + } + } + }, + "tags": ["Assistants"] + }, + "delete": { + "operationId": "AssistantsController_remove", + "summary": "Delete assistant", + "description": "Deletes a specific assistant by ID.", "parameters": [ { "name": "id", @@ -106,36 +569,24 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/AssistantEntity" - } - } - } - } - }, - "tags": ["Assistants"] - }, - "delete": { - "operationId": "AssistantsController_remove", - "summary": "Delete assistant", - "description": "Deletes a specific assistant defined by an assistant's `id`.", - "parameters": [ - { - "name": "id", - "required": true, - "in": "path", - "description": "The unique identifier of the assistant.", - "schema": { - "type": "string" - } - } - ], - "responses": { - "200": { - "description": "The assistant has been successfully deleted.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/DeleteAssistantResponseDto" + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The unique identifier of the deleted assistant." + }, + "object": { + "type": "string", + "enum": ["assistant.deleted"], + "description": "The object type for a deleted assistant." + }, + "deleted": { + "type": "boolean", + "enum": [true], + "description": "Indicates the assistant was successfully deleted." + } + }, + "required": ["id", "object", "deleted"] } } } @@ -2199,6 +2650,84 @@ "tags": ["Engines"] } }, + "/engines/{name}/releases/{version}": { + "get": { + "summary": "List variants for a specific engine version", + "description": "Lists all available variants (builds) for a specific version of an engine. Variants can include different CPU architectures (AVX, AVX2, AVX512), GPU support (CUDA, Vulkan), and operating systems (Windows, Linux, macOS).", + "parameters": [ + { + "name": "name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "default": "llama-cpp" + }, + "description": "The type of engine" + }, + { + "name": "version", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "The version of the engine" + }, + { + "name": "show", + "in": "query", + "required": false, + "schema": { + "type": "string", + "enum": ["all", "compatible"], + "default": "all" + }, + "description": "Filter the variants list. Use 'compatible' to show only variants compatible with the current system, or 'all' to show all available variants." + } + ], + "responses": { + "200": { + "description": "Successfully retrieved variants list", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the variant, including OS, architecture, and capabilities", + "example": "linux-amd64-avx-cuda-11-7" + }, + "created_at": { + "type": "string", + "format": "date-time", + "description": "Creation timestamp of the variant", + "example": "2024-11-13T04:51:16Z" + }, + "size": { + "type": "integer", + "description": "Size of the variant in bytes", + "example": 151224604 + }, + "download_count": { + "type": "integer", + "description": "Number of times this variant has been downloaded", + "example": 0 + } + } + } + } + } + } + } + }, + "tags": ["Engines"] + } + }, "/engines/{name}/releases/latest": { "get": { "summary": "Get latest release", @@ -2314,7 +2843,7 @@ "get_models_url": { "type": "string", "description": "The URL to get models", - "example": "https://api.openai.com/v1/models" + "example": "https://api.openai.com/models" } } } @@ -3378,6 +3907,7 @@ "Files", "Hardware", "Events", + "Assistants", "Threads", "Messages", "Pulling Models", @@ -4858,8 +5388,8 @@ "engine", "version", "inference_params", - "TransformReq", - "TransformResp", + "transform_req", + "transform_resp", "metadata" ], "properties": { @@ -4867,9 +5397,9 @@ "type": "string", "description": "The identifier of the model." }, - "api_key_template": { + "header_template": { "type": "string", - "description": "Template for the API key header." + "description": "Template for the header." }, "engine": { "type": "string", @@ -4902,7 +5432,7 @@ } } }, - "TransformReq": { + "transform_req": { "type": "object", "properties": { "get_models": { @@ -4924,7 +5454,7 @@ } } }, - "TransformResp": { + "transform_resp": { "type": "object", "properties": { "chat_completions": { @@ -5632,9 +6162,9 @@ "description": "Number of GPU layers.", "example": 33 }, - "api_key_template": { + "header_template": { "type": "string", - "description": "Template for the API key header." + "description": "Template for the header." }, "version": { "type": "string", diff --git a/web/containers/AutoLink/index.tsx b/web/containers/AutoLink/index.tsx index 66c84f7f7..0f10f478a 100644 --- a/web/containers/AutoLink/index.tsx +++ b/web/containers/AutoLink/index.tsx @@ -10,23 +10,25 @@ const AutoLink = ({ text }: Props) => { return ( <> - {text.split(delimiter).map((word) => { - const match = word.match(delimiter) - if (match) { - const url = match[0] - return ( - - {url} - - ) - } - return word - })} + {text && + typeof text === 'string' && + text.split(delimiter).map((word) => { + const match = word.match(delimiter) + if (match) { + const url = match[0] + return ( + + {url} + + ) + } + return word + })} ) } diff --git a/web/containers/ErrorMessage/index.tsx b/web/containers/ErrorMessage/index.tsx index cd9334283..ab5a35d32 100644 --- a/web/containers/ErrorMessage/index.tsx +++ b/web/containers/ErrorMessage/index.tsx @@ -23,7 +23,13 @@ import { mainViewStateAtom } from '@/helpers/atoms/App.atom' import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom' import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom' -const ErrorMessage = ({ message }: { message: ThreadMessage }) => { +const ErrorMessage = ({ + message, + errorComponent, +}: { + message?: ThreadMessage + errorComponent?: React.ReactNode +}) => { const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom) const setMainState = useSetAtom(mainViewStateAtom) const setSelectedSettingScreen = useSetAtom(selectedSettingAtom) @@ -50,7 +56,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => { const getErrorTitle = () => { const engine = getEngine() - switch (message.metadata?.error_code) { + switch (message?.metadata?.error_code) { case ErrorCode.InvalidApiKey: case ErrorCode.AuthenticationError: return ( @@ -61,7 +67,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => { className="font-medium text-[hsla(var(--app-link))] underline" onClick={() => { setMainState(MainViewState.Settings) - engine?.name && setSelectedSettingScreen(engine.name) + setSelectedSettingScreen(activeAssistant?.model?.engine ?? '') }} > Settings @@ -77,7 +83,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => { data-testid="passthrough-error-message" className="first-letter:uppercase" > - {message.content[0]?.text?.value === 'Failed to fetch' && + {message?.content[0]?.text?.value === 'Failed to fetch' && engine && engine?.name !== InferenceEngine.cortex_llamacpp ? ( @@ -89,6 +95,9 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => { {message?.content[0]?.text?.value && ( )} + {!message?.content[0]?.text?.value && ( + Something went wrong. Please try again. + )} )}

@@ -100,12 +109,15 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
-
Error
-
-
+
+ + Error +
+
+
setModalTroubleShooting(true)} @@ -116,7 +128,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
{copied ? ( @@ -138,10 +150,10 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
- {getErrorTitle()} + {errorComponent ? errorComponent : getErrorTitle()}
diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/SystemMonitor.test.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/SystemMonitor.test.tsx index 2eba5edbb..ca336b0e5 100644 --- a/web/containers/Layout/BottomPanel/SystemMonitor/SystemMonitor.test.tsx +++ b/web/containers/Layout/BottomPanel/SystemMonitor/SystemMonitor.test.tsx @@ -87,7 +87,7 @@ describe('SystemMonitor', () => { expect(screen.getByText('Running Models')).toBeInTheDocument() expect(screen.getByText('App Log')).toBeInTheDocument() - expect(screen.getByText('7.45/14.90 GB')).toBeInTheDocument() + expect(screen.getByText('7.45GB / 14.90GB')).toBeInTheDocument() expect(screen.getByText('30%')).toBeInTheDocument() }) diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx index f47dfaeb7..d9a0b289a 100644 --- a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx +++ b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx @@ -134,8 +134,8 @@ const SystemMonitor = () => {
Memory
- {toGigabytes(usedRam, { hideUnit: true })}/ - {toGigabytes(totalRam, { hideUnit: true })} GB + {toGigabytes(usedRam, { hideUnit: true })}GB /{' '} + {toGigabytes(totalRam, { hideUnit: true })}GB
@@ -149,41 +149,43 @@ const SystemMonitor = () => {
{gpus.length > 0 && (
- {gpus.map((gpu, index) => { - const gpuUtilization = utilizedMemory( - gpu.memoryFree, - gpu.memoryTotal - ) - return ( -
-
- - {gpu.name} - -
-
- - {gpu.memoryTotal - gpu.memoryFree}/ - {gpu.memoryTotal} - - MB + {gpus + .filter((gpu) => gpu.activated === true) + .map((gpu, index) => { + const gpuUtilization = utilizedMemory( + gpu.free_vram, + gpu.total_vram + ) + return ( +
+
+ + {gpu.name} + +
+
+ + {gpu.total_vram - gpu.free_vram}/ + {gpu.total_vram} + + MB +
-
-
- - - {gpuUtilization}% - +
+ + + {gpuUtilization}% + +
-
- ) - })} + ) + })}
)}
diff --git a/web/containers/Layout/index.tsx b/web/containers/Layout/index.tsx index 18c0edcab..d29647029 100644 --- a/web/containers/Layout/index.tsx +++ b/web/containers/Layout/index.tsx @@ -25,6 +25,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal' import ImportingModelModal from '@/screens/Settings/ImportingModelModal' import SelectingModelModal from '@/screens/Settings/SelectingModelModal' +import { getAppDistinctId, updateDistinctId } from '@/utils/settings' + import LoadingModal from '../LoadingModal' import MainViewContainer from '../MainViewContainer' @@ -96,8 +98,16 @@ const BaseLayout = () => { return properties }, }) - posthog.opt_in_capturing() - posthog.register({ app_version: VERSION }) + // Attempt to restore distinct Id from app global settings + getAppDistinctId() + .then((id) => { + if (id) posthog.identify(id) + }) + .finally(() => { + posthog.opt_in_capturing() + posthog.register({ app_version: VERSION }) + updateDistinctId(posthog.get_distinct_id()) + }) } else { posthog.opt_out_capturing() } diff --git a/web/containers/ModelDropdown/index.tsx b/web/containers/ModelDropdown/index.tsx index 6d2cc0b23..a702d12f7 100644 --- a/web/containers/ModelDropdown/index.tsx +++ b/web/containers/ModelDropdown/index.tsx @@ -28,6 +28,8 @@ import ModelLabel from '@/containers/ModelLabel' import SetupRemoteModel from '@/containers/SetupRemoteModel' +import { useActiveModel } from '@/hooks/useActiveModel' + import { useCreateNewThread } from '@/hooks/useCreateNewThread' import useDownloadModel from '@/hooks/useDownloadModel' import { modelDownloadStateAtom } from '@/hooks/useDownloadState' @@ -40,7 +42,7 @@ import useUpdateModelParameters from '@/hooks/useUpdateModelParameters' import { formatDownloadPercentage, toGigabytes } from '@/utils/converter' import { manualRecommendationModel } from '@/utils/model' -import { getLogoEngine } from '@/utils/modelEngine' +import { getLogoEngine, getTitleByEngine } from '@/utils/modelEngine' import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom' import { @@ -93,6 +95,7 @@ const ModelDropdown = ({ const { updateModelParameter } = useUpdateModelParameters() const searchInputRef = useRef(null) const configuredModels = useAtomValue(configuredModelsAtom) + const { stopModel } = useActiveModel() const featuredModels = configuredModels.filter( (x) => @@ -226,6 +229,7 @@ const ModelDropdown = ({ const model = downloadedModels.find((m) => m.id === modelId) setSelectedModel(model) setOpen(false) + stopModel() if (activeThread) { // Change assistand tools based on model support RAG @@ -248,18 +252,13 @@ const ModelDropdown = ({ ], }) - const defaultContextLength = Math.min( - 8192, - model?.settings.ctx_len ?? 8192 - ) - + const contextLength = model?.settings.ctx_len + ? Math.min(8192, model?.settings.ctx_len ?? 8192) + : undefined const overriddenParameters = { - ctx_len: model?.settings.ctx_len ? defaultContextLength : undefined, - max_tokens: defaultContextLength - ? Math.min( - model?.parameters.max_tokens ?? 8192, - defaultContextLength - ) + ctx_len: contextLength, + max_tokens: contextLength + ? Math.min(model?.parameters.max_tokens ?? 8192, contextLength) : model?.parameters.max_tokens, } @@ -289,6 +288,7 @@ const ModelDropdown = ({ updateThreadMetadata, setThreadModelParams, updateModelParameter, + stopModel, ] ) @@ -429,7 +429,7 @@ const ModelDropdown = ({ /> )}
- {engine.name} + {getTitleByEngine(engine.name)}
@@ -475,7 +475,7 @@ const ModelDropdown = ({ >

{model.name} @@ -549,75 +549,82 @@ const ModelDropdown = ({ (c) => c.id === model.id ) return ( -

  • { - if (!isConfigured && engine.type === 'remote') - return null - if (isDownloaded) { - onClickModelItem(model.id) - } - }} - > -
    -

    + {isDownloaded && ( +

  • - {model.name} -

    - -
  • -
    - {!isDownloaded && ( - - {toGigabytes(model.metadata?.size)} - - )} - {!isDownloading && !isDownloaded ? ( - - downloadModel( - model.sources[0].url, - model.id - ) + onClick={() => { + if ( + !isConfigured && + engine.type === 'remote' + ) + return null + if (isDownloaded) { + onClickModelItem(model.id) } - /> - ) : ( - Object.values(downloadStates) - .filter((x) => x.modelId === model.id) - .map((item) => ( - +
    +

    + {model.name} +

    + +
    +
    + {!isDownloaded && ( + + {toGigabytes(model.metadata?.size)} + + )} + {!isDownloading && !isDownloaded ? ( + + downloadModel( + model.sources[0].url, + model.id + ) } - size={100} /> - )) - )} -
    - + ) : ( + Object.values(downloadStates) + .filter((x) => x.modelId === model.id) + .map((item) => ( + + )) + )} +
    + + )} + ) })} diff --git a/web/containers/ModelLabel/index.tsx b/web/containers/ModelLabel/index.tsx index 564b7edf8..c7c64b210 100644 --- a/web/containers/ModelLabel/index.tsx +++ b/web/containers/ModelLabel/index.tsx @@ -29,15 +29,20 @@ const ModelLabel = ({ size, compact }: Props) => { const { settings } = useSettings() const getLabel = (size: number) => { - const minimumRamModel = size * 1.25 - const availableRam = - settings?.run_mode === 'gpu' - ? availableVram * 1000000 // MB to bytes - : totalRam - usedRam + (activeModel?.metadata?.size ?? 0) + const minimumRamModel = (size * 1.25) / (1024 * 1024) + + const availableRam = settings?.gpus?.some((gpu) => gpu.activated) + ? availableVram * 1000000 // MB to bytes + : totalRam - + (usedRam + + (activeModel?.metadata?.size + ? (activeModel.metadata.size * 1.25) / (1024 * 1024) + : 0)) + if (minimumRamModel > totalRam) { return ( gpu.activated) ? 'VRAM' : 'RAM'} compact={compact} /> ) diff --git a/web/containers/Providers/ModelHandler.tsx b/web/containers/Providers/ModelHandler.tsx index 2c027539e..cceb88a4c 100644 --- a/web/containers/Providers/ModelHandler.tsx +++ b/web/containers/Providers/ModelHandler.tsx @@ -143,8 +143,7 @@ export default function ModelHandler() { return } - // The thread title should not be updated if the message is less than 10 words - // And no new line character is present + // No new line character is presented in the title // And non-alphanumeric characters should be removed if (messageContent.includes('\n')) { messageContent = messageContent.replace(/\n/g, ' ') diff --git a/web/containers/ServerLogs/index.tsx b/web/containers/ServerLogs/index.tsx index 2e978bd23..b89a4c237 100644 --- a/web/containers/ServerLogs/index.tsx +++ b/web/containers/ServerLogs/index.tsx @@ -93,205 +93,211 @@ const ServerLogs = (props: ServerLogsProps) => { }, [listRef.current?.scrollHeight, isUserManuallyScrollingUp, logs]) return ( - - {withCopy && ( -
    -
    - - -
    -
    - )} -
    - {logs.length > 0 ? ( - - {logs.slice(-limit).map((log, i) => { - return ( -

    - {log} -

    - ) - })} -
    - ) : ( -
    - - - - +
    + {withCopy && ( +
    +
    + + +
    )}
    - + +
    + {logs.length > 0 ? ( + + {logs.slice(-limit).map((log, i) => { + return ( +

    + {log} +

    + ) + })} +
    + ) : ( +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    + Empty logs +

    +
    + )} +
    +
    + ) } diff --git a/web/containers/SliderRightPanel/index.tsx b/web/containers/SliderRightPanel/index.tsx index 3fad10212..5022845c9 100644 --- a/web/containers/SliderRightPanel/index.tsx +++ b/web/containers/SliderRightPanel/index.tsx @@ -73,7 +73,7 @@ const SliderRightPanel = ({ trigger={ (MainViewState.Thread) export const defaultJanDataFolderAtom = atom('') +export const LocalEngineDefaultVariantAtom = atom('') + const SHOW_RIGHT_PANEL = 'showRightPanel' // Store panel atom diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts index 4901b9846..57ceeb385 100644 --- a/web/hooks/useCreateNewThread.ts +++ b/web/hooks/useCreateNewThread.ts @@ -82,24 +82,18 @@ export const useCreateNewThread = () => { } // Default context length is 8192 - const defaultContextLength = Math.min( - 8192, - defaultModel?.settings?.ctx_len ?? 8192 - ) + const contextLength = defaultModel?.settings?.ctx_len + ? Math.min(8192, defaultModel?.settings?.ctx_len) + : undefined const overriddenSettings = { - ctx_len: defaultModel?.settings?.ctx_len - ? Math.min(8192, defaultModel?.settings?.ctx_len) - : undefined, + ctx_len: contextLength, } // Use ctx length by default const overriddenParameters = { - max_tokens: defaultContextLength - ? Math.min( - defaultModel?.parameters?.max_tokens ?? 8192, - defaultContextLength - ) + max_tokens: contextLength + ? Math.min(defaultModel?.parameters?.max_tokens ?? 8192, contextLength) : defaultModel?.parameters?.max_tokens, } diff --git a/web/hooks/useFactoryReset.ts b/web/hooks/useFactoryReset.ts index da2e15b03..7344b2eb1 100644 --- a/web/hooks/useFactoryReset.ts +++ b/web/hooks/useFactoryReset.ts @@ -58,6 +58,7 @@ export default function useFactoryReset() { const configuration: AppConfiguration = { data_folder: defaultJanDataFolder, quick_ask: appConfiguration?.quick_ask ?? false, + distinct_id: appConfiguration?.distinct_id, } await window.core?.api?.updateAppConfiguration(configuration) } diff --git a/web/hooks/useGetSystemResources.test.ts b/web/hooks/useGetSystemResources.test.ts index 10e539e07..78392b612 100644 --- a/web/hooks/useGetSystemResources.test.ts +++ b/web/hooks/useGetSystemResources.test.ts @@ -21,7 +21,7 @@ jest.mock('jotai', () => ({ describe('useGetSystemResources', () => { const mockMonitoringExtension = { - getResourcesInfo: jest.fn(), + getHardware: jest.fn(), getCurrentLoad: jest.fn(), } @@ -38,17 +38,17 @@ describe('useGetSystemResources', () => { }) it('should fetch system resources on initial render', async () => { - mockMonitoringExtension.getResourcesInfo.mockResolvedValue({ - mem: { usedMemory: 4000, totalMemory: 8000 }, + mockMonitoringExtension.getHardware.mockResolvedValue({ + cpu: { usage: 50 }, + ram: { available: 4000, total: 8000 }, }) mockMonitoringExtension.getCurrentLoad.mockResolvedValue({ - cpu: { usage: 50 }, gpu: [], }) const { result } = renderHook(() => useGetSystemResources()) - expect(mockMonitoringExtension.getResourcesInfo).toHaveBeenCalledTimes(1) + expect(mockMonitoringExtension.getHardware).toHaveBeenCalledTimes(1) }) it('should start watching system resources when watch is called', () => { @@ -58,14 +58,14 @@ describe('useGetSystemResources', () => { result.current.watch() }) - expect(mockMonitoringExtension.getResourcesInfo).toHaveBeenCalled() + expect(mockMonitoringExtension.getHardware).toHaveBeenCalled() // Fast-forward time by 2 seconds act(() => { jest.advanceTimersByTime(2000) }) - expect(mockMonitoringExtension.getResourcesInfo).toHaveBeenCalled() + expect(mockMonitoringExtension.getHardware).toHaveBeenCalled() }) it('should stop watching when stopWatching is called', () => { @@ -85,7 +85,7 @@ describe('useGetSystemResources', () => { }) // Expect no additional calls after stopping - expect(mockMonitoringExtension.getResourcesInfo).toHaveBeenCalled() + expect(mockMonitoringExtension.getHardware).toHaveBeenCalled() }) it('should not fetch resources if monitoring extension is not available', async () => { @@ -97,7 +97,7 @@ describe('useGetSystemResources', () => { result.current.getSystemResources() }) - expect(mockMonitoringExtension.getResourcesInfo).not.toHaveBeenCalled() + expect(mockMonitoringExtension.getHardware).not.toHaveBeenCalled() expect(mockMonitoringExtension.getCurrentLoad).not.toHaveBeenCalled() }) }) diff --git a/web/hooks/useGetSystemResources.ts b/web/hooks/useGetSystemResources.ts index a05a6a710..e40100a55 100644 --- a/web/hooks/useGetSystemResources.ts +++ b/web/hooks/useGetSystemResources.ts @@ -1,6 +1,7 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ import { useCallback, useEffect, useState } from 'react' -import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core' +import { ExtensionTypeEnum, HardwareManagementExtension } from '@janhq/core' import { useSetAtom } from 'jotai' @@ -20,58 +21,62 @@ export default function useGetSystemResources() { NodeJS.Timeout | number | undefined >(undefined) - const setTotalRam = useSetAtom(totalRamAtom) const setGpus = useSetAtom(gpusAtom) - const setUsedRam = useSetAtom(usedRamAtom) const setCpuUsage = useSetAtom(cpuUsageAtom) const setTotalNvidiaVram = useSetAtom(nvidiaTotalVramAtom) const setAvailableVram = useSetAtom(availableVramAtom) + const setUsedRam = useSetAtom(usedRamAtom) + const setTotalRam = useSetAtom(totalRamAtom) const setRamUtilitized = useSetAtom(ramUtilitizedAtom) const getSystemResources = useCallback(async () => { if ( - !extensionManager.get( - ExtensionTypeEnum.SystemMonitoring + !extensionManager.get( + ExtensionTypeEnum.Hardware ) ) { return } - const monitoring = extensionManager.get( - ExtensionTypeEnum.SystemMonitoring - ) - const resourceInfor = await monitoring?.getResourcesInfo() - const currentLoadInfor = await monitoring?.getCurrentLoad() - if (resourceInfor?.mem?.usedMemory) setUsedRam(resourceInfor.mem.usedMemory) - if (resourceInfor?.mem?.totalMemory) - setTotalRam(resourceInfor.mem.totalMemory) + const hardwareExtension = extensionManager.get( + ExtensionTypeEnum.Hardware + ) + + const hardwareInfo = await hardwareExtension?.getHardware() + + const usedMemory = + Number(hardwareInfo?.ram.total) - Number(hardwareInfo?.ram.available) + + if (hardwareInfo?.ram?.total && hardwareInfo?.ram?.available) + setUsedRam(Number(usedMemory)) + + if (hardwareInfo?.ram?.total) setTotalRam(hardwareInfo.ram.total) const ramUtilitized = - ((resourceInfor?.mem?.usedMemory ?? 0) / - (resourceInfor?.mem?.totalMemory ?? 1)) * - 100 + ((Number(usedMemory) ?? 0) / (hardwareInfo?.ram.total ?? 1)) * 100 + setRamUtilitized(Math.round(ramUtilitized)) - setCpuUsage(Math.round(currentLoadInfor?.cpu?.usage ?? 0)) + setCpuUsage(Math.round(hardwareInfo?.cpu.usage ?? 0)) - const gpus = currentLoadInfor?.gpu ?? [] - setGpus(gpus) + const gpus = hardwareInfo?.gpus ?? [] + setGpus(gpus as any) let totalNvidiaVram = 0 if (gpus.length > 0) { totalNvidiaVram = gpus.reduce( - (total: number, gpu: { memoryTotal: string }) => - total + Number(gpu.memoryTotal), + (total: number, gpu: { total_vram: number }) => + total + Number(gpu.total_vram), 0 ) } + setTotalNvidiaVram(totalNvidiaVram) + setAvailableVram( - gpus.reduce( - (total: number, gpu: { memoryFree: string }) => - total + Number(gpu.memoryFree), - 0 - ) + gpus.reduce((total, gpu) => { + return total + Number(gpu.free_vram || 0) + }, 0) ) }, [ setUsedRam, diff --git a/web/hooks/useGpuSetting.test.ts b/web/hooks/useGpuSetting.test.ts deleted file mode 100644 index f52f07af8..000000000 --- a/web/hooks/useGpuSetting.test.ts +++ /dev/null @@ -1,87 +0,0 @@ -// useGpuSetting.test.ts - -import { renderHook, act } from '@testing-library/react' -import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core' - -// Mock dependencies -jest.mock('@/extension') - -import useGpuSetting from './useGpuSetting' -import { extensionManager } from '@/extension' - -describe('useGpuSetting', () => { - beforeEach(() => { - jest.clearAllMocks() - }) - - it('should return GPU settings when available', async () => { - const mockGpuSettings = { - gpuCount: 2, - gpuNames: ['NVIDIA GeForce RTX 3080', 'NVIDIA GeForce RTX 3070'], - totalMemory: 20000, - freeMemory: 15000, - } - - const mockMonitoringExtension: Partial = { - getGpuSetting: jest.fn().mockResolvedValue(mockGpuSettings), - } - - jest - .spyOn(extensionManager, 'get') - .mockReturnValue(mockMonitoringExtension as MonitoringExtension) - - const { result } = renderHook(() => useGpuSetting()) - - let gpuSettings - await act(async () => { - gpuSettings = await result.current.getGpuSettings() - }) - - expect(gpuSettings).toEqual(mockGpuSettings) - expect(extensionManager.get).toHaveBeenCalledWith( - ExtensionTypeEnum.SystemMonitoring - ) - expect(mockMonitoringExtension.getGpuSetting).toHaveBeenCalled() - }) - - it('should return undefined when no GPU settings are found', async () => { - const mockMonitoringExtension: Partial = { - getGpuSetting: jest.fn().mockResolvedValue(undefined), - } - - jest - .spyOn(extensionManager, 'get') - .mockReturnValue(mockMonitoringExtension as MonitoringExtension) - - const { result } = renderHook(() => useGpuSetting()) - - let gpuSettings - await act(async () => { - gpuSettings = await result.current.getGpuSettings() - }) - - expect(gpuSettings).toBeUndefined() - expect(extensionManager.get).toHaveBeenCalledWith( - ExtensionTypeEnum.SystemMonitoring - ) - expect(mockMonitoringExtension.getGpuSetting).toHaveBeenCalled() - }) - - it('should handle missing MonitoringExtension', async () => { - jest.spyOn(extensionManager, 'get').mockReturnValue(undefined) - jest.spyOn(console, 'debug').mockImplementation(() => {}) - - const { result } = renderHook(() => useGpuSetting()) - - let gpuSettings - await act(async () => { - gpuSettings = await result.current.getGpuSettings() - }) - - expect(gpuSettings).toBeUndefined() - expect(extensionManager.get).toHaveBeenCalledWith( - ExtensionTypeEnum.SystemMonitoring - ) - expect(console.debug).toHaveBeenCalledWith('No GPU setting found') - }) -}) diff --git a/web/hooks/useGpuSetting.ts b/web/hooks/useGpuSetting.ts deleted file mode 100644 index 36f51ed57..000000000 --- a/web/hooks/useGpuSetting.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { useCallback } from 'react' - -import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core' - -import { extensionManager } from '@/extension' - -export default function useGpuSetting() { - const getGpuSettings = useCallback(async () => { - const gpuSetting = await extensionManager - ?.get(ExtensionTypeEnum.SystemMonitoring) - ?.getGpuSetting() - - if (!gpuSetting) { - console.debug('No GPU setting found') - return undefined - } - return gpuSetting - }, []) - - return { getGpuSettings } -} diff --git a/web/hooks/useHardwareManagement.ts b/web/hooks/useHardwareManagement.ts new file mode 100644 index 000000000..d39b3c1fc --- /dev/null +++ b/web/hooks/useHardwareManagement.ts @@ -0,0 +1,99 @@ +import { useMemo } from 'react' + +import { ExtensionTypeEnum, HardwareManagementExtension } from '@janhq/core' + +import { useSetAtom } from 'jotai' +import useSWR from 'swr' + +import { extensionManager } from '@/extension/ExtensionManager' +import { + cpuUsageAtom, + ramUtilitizedAtom, + totalRamAtom, + usedRamAtom, +} from '@/helpers/atoms/SystemBar.atom' + +// fetcher function +async function fetchExtensionData( + extension: HardwareManagementExtension | null, + method: (extension: HardwareManagementExtension) => Promise +): Promise { + if (!extension) { + throw new Error('Extension not found') + } + return method(extension) +} + +const getExtension = () => + extensionManager.get( + ExtensionTypeEnum.Hardware + ) ?? null + +/** + * @returns A Promise that resolves to an object of list engines. + */ +export function useGetHardwareInfo() { + const setCpuUsage = useSetAtom(cpuUsageAtom) + const setUsedRam = useSetAtom(usedRamAtom) + const setTotalRam = useSetAtom(totalRamAtom) + const setRamUtilitized = useSetAtom(ramUtilitizedAtom) + + const extension = useMemo( + () => + extensionManager.get( + ExtensionTypeEnum.Hardware + ) ?? null, + [] + ) + + const { + data: hardware, + error, + mutate, + } = useSWR( + extension ? 'hardware' : null, + () => fetchExtensionData(extension, (ext) => ext.getHardware()), + { + revalidateOnFocus: false, + revalidateOnReconnect: false, + refreshInterval: 2000, + } + ) + + const usedMemory = + Number(hardware?.ram.total) - Number(hardware?.ram.available) + + if (hardware?.ram?.total && hardware?.ram?.available) + setUsedRam(Number(usedMemory)) + + if (hardware?.ram?.total) setTotalRam(hardware.ram.total) + + const ramUtilitized = + ((Number(usedMemory) ?? 0) / (hardware?.ram.total ?? 1)) * 100 + + setRamUtilitized(Math.round(ramUtilitized)) + + setCpuUsage(Math.round(hardware?.cpu.usage ?? 0)) + + return { hardware, error, mutate } +} + +/** + * set gpus activate + * @returns A Promise that resolves set gpus activate. + */ +export const setActiveGpus = async (data: { gpus: number[] }) => { + const extension = getExtension() + + if (!extension) { + throw new Error('Extension is not available') + } + + try { + const response = await extension.setAvtiveGpu(data) + return response + } catch (error) { + console.error('Failed to install engine variant:', error) + throw error + } +} diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts index d3c8ff142..65124fcab 100644 --- a/web/hooks/useSendChatMessage.ts +++ b/web/hooks/useSendChatMessage.ts @@ -196,7 +196,10 @@ export default function useSendChatMessage() { } updateThread(updatedThread) - if (!isResend) { + if ( + !isResend && + (newMessage.content.length || newMessage.attachments?.length) + ) { // Add message const createdMessage = await extensionManager .get(ExtensionTypeEnum.Conversational) diff --git a/web/hooks/useSettings.ts b/web/hooks/useSettings.ts index 0f02d41af..a9635aa93 100644 --- a/web/hooks/useSettings.ts +++ b/web/hooks/useSettings.ts @@ -1,20 +1,10 @@ import { useCallback, useEffect, useState } from 'react' -import { fs, joinPath } from '@janhq/core' - -type NvidiaDriver = { - exist: boolean - version: string -} +import { fs, GpuSettingInfo, joinPath } from '@janhq/core' export type AppSettings = { - run_mode: 'cpu' | 'gpu' | undefined - notify: boolean - gpus_in_use: string[] vulkan: boolean - gpus: string[] - nvidia_driver: NvidiaDriver - cuda: NvidiaDriver + gpus: GpuSettingInfo[] } export const useSettings = () => { @@ -38,29 +28,16 @@ export const useSettings = () => { return {} }, []) - const saveSettings = async ({ - runMode, - notify, - gpusInUse, - vulkan, - }: { - runMode?: string | undefined - notify?: boolean | undefined - gpusInUse?: string[] | undefined - vulkan?: boolean | undefined - }) => { + const saveSettings = async ({ vulkan }: { vulkan?: boolean | undefined }) => { const settingsFile = await joinPath(['file://settings', 'settings.json']) const settings = await readSettings() - if (runMode != null) settings.run_mode = runMode - if (notify != null) settings.notify = notify - if (gpusInUse != null) settings.gpus_in_use = gpusInUse.filter((e) => !!e) if (vulkan != null) { settings.vulkan = vulkan // GPU enabled, set run_mode to 'gpu' if (settings.vulkan === true) { - settings.run_mode = 'gpu' - } else { - settings.run_mode = 'cpu' + settings?.gpus?.some((gpu: { activated: boolean }) => + gpu.activated === true ? 'gpu' : 'cpu' + ) } } await fs.writeFileSync(settingsFile, JSON.stringify(settings)) diff --git a/web/package.json b/web/package.json index 13d433b3a..63dde8c05 100644 --- a/web/package.json +++ b/web/package.json @@ -14,6 +14,7 @@ "test": "jest" }, "dependencies": { + "@hello-pangea/dnd": "17.0.0", "@hookform/resolvers": "^3.9.1", "@janhq/core": "link:../core", "@janhq/joi": "link:../joi", @@ -29,7 +30,7 @@ "jotai": "^2.6.0", "katex": "^0.16.10", "lodash": "^4.17.21", - "lucide-react": "^0.291.0", + "lucide-react": "^0.311.0", "marked": "^9.1.2", "next": "14.2.3", "next-themes": "^0.2.1", @@ -57,7 +58,7 @@ "slate-react": "0.110.3", "swr": "^2.2.5", "tailwind-merge": "^2.0.0", - "tailwindcss": "3.3.5", + "tailwindcss": "3.4.17", "ulidx": "^2.3.0", "use-debounce": "^10.0.0", "uuid": "^9.0.1", diff --git a/web/public/images/ModelProvider/deepseek.svg b/web/public/images/ModelProvider/deepseek.svg new file mode 100644 index 000000000..6f4b775d3 --- /dev/null +++ b/web/public/images/ModelProvider/deepseek.svg @@ -0,0 +1,25 @@ + + + + +Created with Pixso. + + diff --git a/web/public/images/ModelProvider/google-gemini.svg b/web/public/images/ModelProvider/google-gemini.svg new file mode 100644 index 000000000..787c83710 --- /dev/null +++ b/web/public/images/ModelProvider/google-gemini.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/web/screens/Hub/ModelList/ModelHeader/index.tsx b/web/screens/Hub/ModelList/ModelHeader/index.tsx index fea3dd3a6..6315ea1fc 100644 --- a/web/screens/Hub/ModelList/ModelHeader/index.tsx +++ b/web/screens/Hub/ModelList/ModelHeader/index.tsx @@ -55,7 +55,7 @@ const ModelItemHeader = ({ model, onSelectedModel }: Props) => { // Default nvidia returns vram in MB, need to convert to bytes to match the unit of totalRamW let ram = nvidiaTotalVram * 1024 * 1024 - if (ram === 0 || settings?.run_mode === 'cpu') { + if (ram === 0 || settings?.gpus?.some((gpu) => gpu.activated !== true)) { ram = totalRam } const serverEnabled = useAtomValue(serverEnabledAtom) diff --git a/web/screens/Hub/ModelPage/index.tsx b/web/screens/Hub/ModelPage/index.tsx index dd551c96d..d46d18ca4 100644 --- a/web/screens/Hub/ModelPage/index.tsx +++ b/web/screens/Hub/ModelPage/index.tsx @@ -131,10 +131,10 @@ const ModelPage = ({ model, onGoBack }: Props) => { {model.type !== 'cloud' && ( <> - + Format - + Size diff --git a/web/screens/LocalServer/LocalServerRightPanel/index.tsx b/web/screens/LocalServer/LocalServerRightPanel/index.tsx index 900a8128e..02e51868d 100644 --- a/web/screens/LocalServer/LocalServerRightPanel/index.tsx +++ b/web/screens/LocalServer/LocalServerRightPanel/index.tsx @@ -124,16 +124,20 @@ const LocalServerRightPanel = () => { clipboard.copy(selectedModel?.id) }} suffixIcon={ - clipboard.copied ? ( - + selectedModel ? ( + clipboard.copied ? ( + + ) : ( + + ) ) : ( - + <> ) } /> diff --git a/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx b/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx index c59163735..207a11ee6 100644 --- a/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx +++ b/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx @@ -30,7 +30,7 @@ const ModalConfirmReset = () => { content={

    - Restore appplication to its initial state, erasing all models and + Restore application to its initial state, erasing all models and chat history. This action is irreversible and recommended only if the application is corrupted.

    diff --git a/web/screens/Settings/Advanced/FactoryReset/index.tsx b/web/screens/Settings/Advanced/FactoryReset/index.tsx index f688e0a61..72642fafe 100644 --- a/web/screens/Settings/Advanced/FactoryReset/index.tsx +++ b/web/screens/Settings/Advanced/FactoryReset/index.tsx @@ -17,7 +17,7 @@ const FactoryReset = () => {

    - Restore appplication to its initial state, erasing all models and chat + Restore application to its initial state, erasing all models and chat history. This action is irreversible and recommended only if the application is corrupted.

    diff --git a/web/screens/Settings/Advanced/index.tsx b/web/screens/Settings/Advanced/index.tsx index 66240c028..cb10a1778 100644 --- a/web/screens/Settings/Advanced/index.tsx +++ b/web/screens/Settings/Advanced/index.tsx @@ -1,32 +1,19 @@ 'use client' -import { useEffect, useState, ChangeEvent } from 'react' +import { ChangeEvent } from 'react' -import { openExternalUrl, AppConfiguration } from '@janhq/core' +import { AppConfiguration } from '@janhq/core' -import { - ScrollArea, - Switch, - Input, - Tooltip, - Checkbox, - useClickOutside, - Button, -} from '@janhq/joi' +import { ScrollArea, Switch, Button } from '@janhq/joi' import { useAtom, useAtomValue, useSetAtom } from 'jotai' -import { ChevronDownIcon, ArrowRightIcon } from 'lucide-react' -import { AlertTriangleIcon, AlertCircleIcon } from 'lucide-react' - -import { twMerge } from 'tailwind-merge' +import { ArrowRightIcon } from 'lucide-react' import { useDebouncedCallback } from 'use-debounce' -import { snackbar, toaster } from '@/containers/Toast' +import { toaster } from '@/containers/Toast' -import { useActiveModel } from '@/hooks/useActiveModel' import { useConfigurations } from '@/hooks/useConfigurations' -import { useSettings } from '@/hooks/useSettings' import ModalDeleteAllThreads from '@/screens/Thread/ThreadLeftPanel/ModalDeleteAllThreads' @@ -36,7 +23,6 @@ import FactoryReset from './FactoryReset' import { experimentalFeatureEnabledAtom, proxyEnabledAtom, - vulkanEnabledAtom, quickAskEnabledAtom, } from '@/helpers/atoms/AppConfig.atom' @@ -44,12 +30,6 @@ import { ThreadModalAction } from '@/helpers/atoms/Thread.atom' import { modalActionThreadAtom } from '@/helpers/atoms/Thread.atom' -type GPU = { - id: string - vram: number | null - name: string -} - /** * Advanced Settings Screen * @returns @@ -58,31 +38,14 @@ const Advanced = ({ setSubdir }: { setSubdir: (subdir: string) => void }) => { const [experimentalEnabled, setExperimentalEnabled] = useAtom( experimentalFeatureEnabledAtom ) - const [vulkanEnabled, setVulkanEnabled] = useAtom(vulkanEnabledAtom) + const [proxyEnabled, setProxyEnabled] = useAtom(proxyEnabledAtom) const quickAskEnabled = useAtomValue(quickAskEnabledAtom) - const [gpuEnabled, setGpuEnabled] = useState(false) - const [gpuList, setGpuList] = useState([]) - const [gpusInUse, setGpusInUse] = useState([]) - const [dropdownOptions, setDropdownOptions] = useState( - null - ) const { configurePullOptions } = useConfigurations() - const [toggle, setToggle] = useState(null) - - const { readSettings, saveSettings } = useSettings() - const { stopModel } = useActiveModel() - const [open, setOpen] = useState(false) const setModalActionThread = useSetAtom(modalActionThreadAtom) - const selectedGpu = gpuList - .filter((x) => gpusInUse.includes(x.id)) - .map((y) => { - return y['name'] - }) - /** * There could be a case where the state update is not synced * so that retrieving state value from other hooks would not be accurate @@ -110,24 +73,6 @@ const Advanced = ({ setSubdir }: { setSubdir: (subdir: string) => void }) => { if (relaunch) window.core?.api?.relaunch() } - /** - * Update Vulkan Enabled - * @param e - * @param relaunch - * @returns void - */ - const updateVulkanEnabled = async (e: boolean, relaunch: boolean = true) => { - toaster({ - title: 'Reload', - description: 'Vulkan settings updated. Reload now to apply the changes.', - }) - stopModel() - setVulkanEnabled(e) - await saveSettings({ vulkan: e, gpusInUse: [] }) - // Relaunch to apply settings - if (relaunch) window.location.reload() - } - /** * Update Experimental Enabled * @param e @@ -143,71 +88,11 @@ const Advanced = ({ setSubdir }: { setSubdir: (subdir: string) => void }) => { if (e.target.checked) return // It affects other settings, so we need to reset them - const isRelaunch = quickAskEnabled || vulkanEnabled + const isRelaunch = quickAskEnabled if (quickAskEnabled) await updateQuickAskEnabled(false, false) - if (vulkanEnabled) await updateVulkanEnabled(false, false) if (isRelaunch) window.core?.api?.relaunch() } - /** - * useEffect to set GPU enabled if possible - */ - useEffect(() => { - const setUseGpuIfPossible = async () => { - const settings = await readSettings() - setGpuEnabled(settings.run_mode === 'gpu' && settings.gpus?.length > 0) - setGpusInUse(settings.gpus_in_use || []) - setVulkanEnabled(settings.vulkan || false) - if (settings.gpus) { - setGpuList(settings.gpus) - } - } - setUseGpuIfPossible() - }, [readSettings, setGpuList, setGpuEnabled, setGpusInUse, setVulkanEnabled]) - - /** - * Handle GPU Change - * @param gpuId - * @returns - */ - const handleGPUChange = async (gpuId: string) => { - let updatedGpusInUse = [...gpusInUse] - if (updatedGpusInUse.includes(gpuId)) { - updatedGpusInUse = updatedGpusInUse.filter((id) => id !== gpuId) - if ( - gpuEnabled && - updatedGpusInUse.length === 0 && - gpuId && - gpuId.trim() - ) { - // Vulkan support only allow 1 active device at a time - if (vulkanEnabled) { - updatedGpusInUse = [] - } - updatedGpusInUse.push(gpuId) - } - } else { - // Vulkan support only allow 1 active device at a time - if (vulkanEnabled) { - updatedGpusInUse = [] - } - if (gpuId && gpuId.trim()) updatedGpusInUse.push(gpuId) - } - setGpusInUse(updatedGpusInUse) - await saveSettings({ gpusInUse: updatedGpusInUse.filter((e) => !!e) }) - // Reload window to apply changes - // This will trigger engine servers to restart - window.location.reload() - } - - const gpuSelectionPlaceHolder = - gpuList.length > 0 ? 'Select GPU' : "You don't have any compatible GPU" - - /** - * Handle click outside - */ - useClickOutside(() => setOpen(false), null, [dropdownOptions, toggle]) - return (
    @@ -231,201 +116,6 @@ const Advanced = ({ setSubdir }: { setSubdir: (subdir: string) => void }) => {
    - {/* CPU / GPU switching */} - {!isMac && ( -
    -
    -
    -
    -
    GPU Acceleration
    -
    -

    - Enable to enhance model performance by utilizing your GPU - devices for acceleration. Read{' '} - - {' '} - - openExternalUrl( - 'https://jan.ai/guides/troubleshooting/gpu-not-used/' - ) - } - > - troubleshooting guide - {' '} - {' '} - for further assistance. -

    -
    - -
    - {gpuList.length > 0 && !gpuEnabled && ( - - } - content="Disabling NVIDIA GPU Acceleration may result in reduced - performance. It is recommended to keep this enabled for - optimal user experience." - /> - )} - { - if (e.target.checked === true) { - saveSettings({ runMode: 'gpu' }) - setGpuEnabled(true) - snackbar({ - description: - 'Successfully turned on GPU Acceleration', - type: 'success', - }) - } else { - saveSettings({ runMode: 'cpu' }) - setGpuEnabled(false) - snackbar({ - description: - 'Successfully turned off GPU Acceleration', - type: 'success', - }) - } - // Stop any running model to apply the changes - if (e.target.checked !== gpuEnabled) { - stopModel().finally(() => { - setTimeout(() => { - window.location.reload() - }, 300) - }) - } - }} - /> - } - content="Your current device does not have a compatible GPU for - monitoring. To enable GPU monitoring, please ensure your - device has a supported Nvidia or AMD GPU with updated - drivers." - disabled={gpuList.length > 0} - /> -
    -
    - -
    - -
    - - } - onClick={() => setOpen(!open)} - /> - {gpuList.length > 0 && ( -
    -
    -

    - {vulkanEnabled ? 'Vulkan Supported GPUs' : 'Nvidia'} -

    -
    -
    - {gpuList - .filter((gpu) => - vulkanEnabled - ? gpu.name - : gpu.name?.toLowerCase().includes('nvidia') - ) - .map((gpu) => ( -
    - handleGPUChange(gpu.id)} - label={ - - {gpu.name} - {!vulkanEnabled && ( - {gpu.vram}MB VRAM - )} - - } - /> -
    - ))} -
    - {gpuEnabled && gpusInUse.length > 1 && ( -
    - -

    - If multi-GPU is enabled with different GPU models - or without NVLink, it could impact token speed. -

    -
    - )} -
    -
    -
    - )} -
    -
    -
    - )} - - {/* Vulkan for AMD GPU/ APU and Intel Arc GPU */} - {!isMac && experimentalEnabled && ( -
    -
    -
    -
    Vulkan Support
    -
    -

    - Enable Vulkan with AMD GPU/APU and Intel Arc GPU for better - model performance (reload needed). -

    -
    -
    - updateVulkanEnabled(e.target.checked)} - /> -
    -
    - )} - {/* Proxy Settings Link */} @@ -436,7 +126,7 @@ const Advanced = ({ setSubdir }: { setSubdir: (subdir: string) => void }) => {
    HTTPS Proxy

    - Optional proxy server for internet connections + Optional proxy server for internet connections.

    @@ -461,11 +151,11 @@ const Advanced = ({ setSubdir }: { setSubdir: (subdir: string) => void }) => {
    Jan Quick Ask

    - Enable Quick Ask to be triggered via the default hotkey . + Enable Quick Ask to be triggered via the default hotkey {isMac ? '⌘' : 'Ctrl'} + J {' '} - (reload needed). + .

    {
    - {coreActiveExtensions.length > 0 && ( -
    -
    - Core Extension -
    -
    - )} {coreActiveExtensions .filter((x) => x.name.includes(searchText.toLowerCase().trim())) .sort((a, b) => a.name.localeCompare(b.name)) diff --git a/web/screens/Settings/Engines/DeleteEngineVariant.tsx b/web/screens/Settings/Engines/DeleteEngineVariant.tsx index 1033164e6..d21dac3d8 100644 --- a/web/screens/Settings/Engines/DeleteEngineVariant.tsx +++ b/web/screens/Settings/Engines/DeleteEngineVariant.tsx @@ -25,7 +25,7 @@ const DeleteEngineVariant = ({ return ( Delete {variant.name}} + title={Delete Variant} open={open} onOpenChange={() => setOpen(!open)} trigger={ @@ -39,7 +39,8 @@ const DeleteEngineVariant = ({ content={

    - Are you sure you want to delete this variant? + Are you sure you want to delete {variant.name}? This action cannot + be undone.

    {
    - onSwitchChange(engine)} - /> + {engine !== InferenceEngine.cortex_llamacpp && ( + onSwitchChange(engine)} + /> + )} - ) : ( - - )} - - )} + ) ?? 0 + )} + +
    + + ) : ( + + )} + + )} +
    -
    - ) - })} + ) + })} diff --git a/web/screens/Settings/Engines/ModalAddModel.tsx b/web/screens/Settings/Engines/ModalAddModel.tsx index 40c986e92..1fbdabb6a 100644 --- a/web/screens/Settings/Engines/ModalAddModel.tsx +++ b/web/screens/Settings/Engines/ModalAddModel.tsx @@ -10,7 +10,7 @@ import { InferenceEngine, Model } from '@janhq/core' import { Button, Input, Modal } from '@janhq/joi' import { useAtomValue } from 'jotai' -import { PlusIcon } from 'lucide-react' +import { PlusIcon, ArrowUpRightFromSquare } from 'lucide-react' import { z } from 'zod' @@ -71,7 +71,7 @@ const ModelAddModel = ({ engine }: { engine: string }) => { {prefix} {label}
    -

    +

    {desc} {isRequired && *}

    @@ -97,7 +97,7 @@ const ModelAddModel = ({ engine }: { engine: string }) => { className="w-[500px]" content={
    -
    +