diff --git a/.github/workflows/jan-electron-build-beta.yml b/.github/workflows/jan-electron-build-beta.yml
index 9cae31d67..4f9886010 100644
--- a/.github/workflows/jan-electron-build-beta.yml
+++ b/.github/workflows/jan-electron-build-beta.yml
@@ -9,31 +9,6 @@ jobs:
get-update-version:
uses: ./.github/workflows/template-get-update-version.yml
- create-draft-release:
- runs-on: ubuntu-latest
- if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
- outputs:
- upload_url: ${{ steps.create_release.outputs.upload_url }}
- version: ${{ steps.get_version.outputs.version }}
- permissions:
- contents: write
- steps:
- - name: Extract tag name without v prefix
- id: get_version
- run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}"
- env:
- GITHUB_REF: ${{ github.ref }}
- - name: Create Draft Release
- id: create_release
- uses: softprops/action-gh-release@v2
- with:
- tag_name: ${{ github.ref_name }}
- token: ${{ secrets.GITHUB_TOKEN }}
- name: "${{ env.VERSION }}"
- draft: true
- prerelease: false
- generate_release_notes: true
-
build-macos:
uses: ./.github/workflows/template-build-macos.yml
secrets: inherit
@@ -65,7 +40,7 @@ jobs:
beta: true
sync-temp-to-latest:
- needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64]
+ needs: [build-macos, build-windows-x64, build-linux-x64]
runs-on: ubuntu-latest
permissions:
contents: write
@@ -82,19 +57,15 @@ jobs:
AWS_DEFAULT_REGION: ${{ secrets.DELTA_AWS_REGION }}
AWS_EC2_METADATA_DISABLED: "true"
- - name: set release to prerelease
- run: |
- gh release edit v${{ needs.create-draft-release.outputs.version }} --draft=false --prerelease
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
noti-discord-and-update-url-readme:
- needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64, sync-temp-to-latest]
+ needs: [build-macos, get-update-version, build-windows-x64, build-linux-x64, sync-temp-to-latest]
runs-on: ubuntu-latest
steps:
- name: Set version to environment variable
run: |
- echo "VERSION=${{ needs.create-draft-release.outputs.version }}" >> $GITHUB_ENV
+ VERSION=${{ needs.get-update-version.outputs.new_version }}
+ VERSION="${VERSION#v}"
+ echo "VERSION=$VERSION" >> $GITHUB_ENV
- name: Notify Discord
uses: Ilshidur/action-discord@master
@@ -105,6 +76,5 @@ jobs:
- macOS Universal: https://delta.jan.ai/beta/jan-beta-mac-universal-{{ VERSION }}.dmg
- Linux Deb: https://delta.jan.ai/beta/jan-beta-linux-amd64-{{ VERSION }}.deb
- Linux AppImage: https://delta.jan.ai/beta/jan-beta-linux-x86_64-{{ VERSION }}.AppImage
- - Github Release URL: https://github.com/janhq/jan/releases/tag/v{{ VERSION }}
env:
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_JAN_BETA }}
\ No newline at end of file
diff --git a/.github/workflows/template-build-jan-server.yml b/.github/workflows/template-build-jan-server.yml
deleted file mode 100644
index 9bb772605..000000000
--- a/.github/workflows/template-build-jan-server.yml
+++ /dev/null
@@ -1,39 +0,0 @@
-name: build-jan-server
-on:
- workflow_call:
- inputs:
- dockerfile_path:
- required: false
- type: string
- default: './Dockerfile'
- docker_image_tag:
- required: true
- type: string
- default: 'ghcr.io/janhq/jan-server:dev-latest'
-
-jobs:
- build:
- runs-on: ubuntu-latest
- env:
- REGISTRY: ghcr.io
- IMAGE_NAME: janhq/jan-server
- permissions:
- packages: write
- steps:
- - name: Checkout
- uses: actions/checkout@v4
-
- - name: Log in to the Container registry
- uses: docker/login-action@v3
- with:
- registry: ${{ env.REGISTRY }}
- username: ${{ github.actor }}
- password: ${{ secrets.GITHUB_TOKEN }}
-
- - name: Build and push Docker image
- uses: docker/build-push-action@v3
- with:
- context: .
- file: ${{ inputs.dockerfile_path }}
- push: true
- tags: ${{ inputs.docker_image_tag }}
\ No newline at end of file
diff --git a/.github/workflows/template-build-linux-x64.yml b/.github/workflows/template-build-linux-x64.yml
index 85b050e62..68079d0dc 100644
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux-x64.yml
@@ -83,7 +83,7 @@ jobs:
cat ./electron/package.json
echo "------------------------"
cat ./package.json
- jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
+ jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
mv /tmp/package.json electron/package.json
cat electron/package.json
diff --git a/.github/workflows/template-build-macos.yml b/.github/workflows/template-build-macos.yml
index 2eabd9ce2..6bc38f786 100644
--- a/.github/workflows/template-build-macos.yml
+++ b/.github/workflows/template-build-macos.yml
@@ -99,7 +99,7 @@ jobs:
cat ./electron/package.json
echo "------------------------"
cat ./package.json
- jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
+ jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
mv /tmp/package.json electron/package.json
cat electron/package.json
diff --git a/.github/workflows/template-build-windows-x64.yml b/.github/workflows/template-build-windows-x64.yml
index a317b4960..9e3171e03 100644
--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@@ -108,7 +108,7 @@ jobs:
cat ./package.json
echo "------------------------"
cat ./electron/scripts/uninstaller.nsh
- jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
+ jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
mv /tmp/package.json electron/package.json
cat electron/package.json
diff --git a/core/src/browser/extension.ts b/core/src/browser/extension.ts
index 1d641980b..d768473c9 100644
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@@ -12,6 +12,7 @@ export enum ExtensionTypeEnum {
SystemMonitoring = 'systemMonitoring',
HuggingFace = 'huggingFace',
Engine = 'engine',
+ Hardware = 'hardware',
}
export interface ExtensionType {
diff --git a/core/src/browser/extensions/engines/OAIEngine.test.ts b/core/src/browser/extensions/engines/OAIEngine.test.ts
index 81348786c..66537d0be 100644
--- a/core/src/browser/extensions/engines/OAIEngine.test.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.test.ts
@@ -38,8 +38,14 @@ describe('OAIEngine', () => {
it('should subscribe to events on load', () => {
engine.onLoad()
- expect(events.on).toHaveBeenCalledWith(MessageEvent.OnMessageSent, expect.any(Function))
- expect(events.on).toHaveBeenCalledWith(InferenceEvent.OnInferenceStopped, expect.any(Function))
+ expect(events.on).toHaveBeenCalledWith(
+ MessageEvent.OnMessageSent,
+ expect.any(Function)
+ )
+ expect(events.on).toHaveBeenCalledWith(
+ InferenceEvent.OnInferenceStopped,
+ expect.any(Function)
+ )
})
it('should handle inference request', async () => {
@@ -77,7 +83,12 @@ describe('OAIEngine', () => {
expect(events.emit).toHaveBeenCalledWith(
MessageEvent.OnMessageUpdate,
expect.objectContaining({
- content: [{ type: ContentType.Text, text: { value: 'test response', annotations: [] } }],
+ content: [
+ {
+ type: ContentType.Text,
+ text: { value: 'test response', annotations: [] },
+ },
+ ],
status: MessageStatus.Ready,
})
)
@@ -101,11 +112,10 @@ describe('OAIEngine', () => {
await engine.inference(data)
- expect(events.emit).toHaveBeenCalledWith(
+ expect(events.emit).toHaveBeenLastCalledWith(
MessageEvent.OnMessageUpdate,
expect.objectContaining({
- content: [{ type: ContentType.Text, text: { value: 'test error', annotations: [] } }],
- status: MessageStatus.Error,
+ status: 'error',
error_code: 500,
})
)
diff --git a/core/src/browser/extensions/engines/OAIEngine.ts b/core/src/browser/extensions/engines/OAIEngine.ts
index 6b4c20a19..61032357c 100644
--- a/core/src/browser/extensions/engines/OAIEngine.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.ts
@@ -42,7 +42,9 @@ export abstract class OAIEngine extends AIEngine {
*/
override onLoad() {
super.onLoad()
- events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data))
+ events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
+ this.inference(data)
+ )
events.on(InferenceEvent.OnInferenceStopped, () => this.stopInference())
}
@@ -128,7 +130,9 @@ export abstract class OAIEngine extends AIEngine {
events.emit(MessageEvent.OnMessageUpdate, message)
},
complete: async () => {
- message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error
+ message.status = message.content.length
+ ? MessageStatus.Ready
+ : MessageStatus.Error
events.emit(MessageEvent.OnMessageUpdate, message)
},
error: async (err: any) => {
@@ -141,7 +145,10 @@ export abstract class OAIEngine extends AIEngine {
message.content[0] = {
type: ContentType.Text,
text: {
- value: err.message,
+ value:
+ typeof message === 'string'
+ ? err.message
+ : (JSON.stringify(err.message) ?? err.detail),
annotations: [],
},
}
diff --git a/core/src/browser/extensions/engines/helpers/sse.test.ts b/core/src/browser/extensions/engines/helpers/sse.test.ts
index 0b78aa9b5..f8c2ac6b4 100644
--- a/core/src/browser/extensions/engines/helpers/sse.test.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.test.ts
@@ -1,14 +1,17 @@
import { lastValueFrom, Observable } from 'rxjs'
import { requestInference } from './sse'
-import { ReadableStream } from 'stream/web';
+import { ReadableStream } from 'stream/web'
describe('requestInference', () => {
it('should send a request to the inference server and return an Observable', () => {
// Mock the fetch function
const mockFetch: any = jest.fn(() =>
Promise.resolve({
ok: true,
- json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }),
+ json: () =>
+ Promise.resolve({
+ choices: [{ message: { content: 'Generated response' } }],
+ }),
headers: new Headers(),
redirected: false,
status: 200,
@@ -36,7 +39,10 @@ describe('requestInference', () => {
const mockFetch: any = jest.fn(() =>
Promise.resolve({
ok: false,
- json: () => Promise.resolve({ error: { message: 'Wrong API Key', code: 'invalid_api_key' } }),
+ json: () =>
+ Promise.resolve({
+ error: { message: 'Invalid API Key.', code: 'invalid_api_key' },
+ }),
headers: new Headers(),
redirected: false,
status: 401,
@@ -56,69 +62,85 @@ describe('requestInference', () => {
// Assert the expected behavior
expect(result).toBeInstanceOf(Observable)
- expect(lastValueFrom(result)).rejects.toEqual({ message: 'Wrong API Key', code: 'invalid_api_key' })
+ expect(lastValueFrom(result)).rejects.toEqual({
+ message: 'Invalid API Key.',
+ code: 'invalid_api_key',
+ })
})
})
- it('should handle a successful response with a transformResponse function', () => {
- // Mock the fetch function
- const mockFetch: any = jest.fn(() =>
- Promise.resolve({
- ok: true,
- json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }),
- headers: new Headers(),
- redirected: false,
- status: 200,
- statusText: 'OK',
- })
- )
- jest.spyOn(global, 'fetch').mockImplementation(mockFetch)
-
- // Define the test inputs
- const inferenceUrl = 'https://inference-server.com'
- const requestBody = { message: 'Hello' }
- const model = { id: 'model-id', parameters: { stream: false } }
- const transformResponse = (data: any) => data.choices[0].message.content.toUpperCase()
-
- // Call the function
- const result = requestInference(inferenceUrl, requestBody, model, undefined, undefined, transformResponse)
-
- // Assert the expected behavior
- expect(result).toBeInstanceOf(Observable)
- expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE')
- })
-
-
- it('should handle a successful response with streaming enabled', () => {
- // Mock the fetch function
- const mockFetch: any = jest.fn(() =>
- Promise.resolve({
- ok: true,
- body: new ReadableStream({
- start(controller) {
- controller.enqueue(new TextEncoder().encode('data: {"choices": [{"delta": {"content": "Streamed"}}]}'));
- controller.enqueue(new TextEncoder().encode('data: [DONE]'));
- controller.close();
- }
+it('should handle a successful response with a transformResponse function', () => {
+ // Mock the fetch function
+ const mockFetch: any = jest.fn(() =>
+ Promise.resolve({
+ ok: true,
+ json: () =>
+ Promise.resolve({
+ choices: [{ message: { content: 'Generated response' } }],
}),
- headers: new Headers(),
- redirected: false,
- status: 200,
- statusText: 'OK',
- })
- );
- jest.spyOn(global, 'fetch').mockImplementation(mockFetch);
-
- // Define the test inputs
- const inferenceUrl = 'https://inference-server.com';
- const requestBody = { message: 'Hello' };
- const model = { id: 'model-id', parameters: { stream: true } };
-
- // Call the function
- const result = requestInference(inferenceUrl, requestBody, model);
-
- // Assert the expected behavior
- expect(result).toBeInstanceOf(Observable);
- expect(lastValueFrom(result)).resolves.toEqual('Streamed');
- });
+ headers: new Headers(),
+ redirected: false,
+ status: 200,
+ statusText: 'OK',
+ })
+ )
+ jest.spyOn(global, 'fetch').mockImplementation(mockFetch)
+ // Define the test inputs
+ const inferenceUrl = 'https://inference-server.com'
+ const requestBody = { message: 'Hello' }
+ const model = { id: 'model-id', parameters: { stream: false } }
+ const transformResponse = (data: any) =>
+ data.choices[0].message.content.toUpperCase()
+
+ // Call the function
+ const result = requestInference(
+ inferenceUrl,
+ requestBody,
+ model,
+ undefined,
+ undefined,
+ transformResponse
+ )
+
+ // Assert the expected behavior
+ expect(result).toBeInstanceOf(Observable)
+ expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE')
+})
+
+it('should handle a successful response with streaming enabled', () => {
+ // Mock the fetch function
+ const mockFetch: any = jest.fn(() =>
+ Promise.resolve({
+ ok: true,
+ body: new ReadableStream({
+ start(controller) {
+ controller.enqueue(
+ new TextEncoder().encode(
+ 'data: {"choices": [{"delta": {"content": "Streamed"}}]}'
+ )
+ )
+ controller.enqueue(new TextEncoder().encode('data: [DONE]'))
+ controller.close()
+ },
+ }),
+ headers: new Headers(),
+ redirected: false,
+ status: 200,
+ statusText: 'OK',
+ })
+ )
+ jest.spyOn(global, 'fetch').mockImplementation(mockFetch)
+
+ // Define the test inputs
+ const inferenceUrl = 'https://inference-server.com'
+ const requestBody = { message: 'Hello' }
+ const model = { id: 'model-id', parameters: { stream: true } }
+
+ // Call the function
+ const result = requestInference(inferenceUrl, requestBody, model)
+
+ // Assert the expected behavior
+ expect(result).toBeInstanceOf(Observable)
+ expect(lastValueFrom(result)).resolves.toEqual('Streamed')
+})
diff --git a/core/src/browser/extensions/engines/helpers/sse.ts b/core/src/browser/extensions/engines/helpers/sse.ts
index 55cde56b4..5c63008ff 100644
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@@ -32,20 +32,19 @@ export function requestInference(
})
.then(async (response) => {
if (!response.ok) {
- const data = await response.json()
- let errorCode = ErrorCode.Unknown
- if (data.error) {
- errorCode = data.error.code ?? data.error.type ?? ErrorCode.Unknown
- } else if (response.status === 401) {
- errorCode = ErrorCode.InvalidApiKey
+ if (response.status === 401) {
+ throw {
+ code: ErrorCode.InvalidApiKey,
+ message: 'Invalid API Key.',
+ }
}
- const error = {
- message: data.error?.message ?? data.message ?? 'Error occurred.',
- code: errorCode,
+ let data = await response.json()
+ try {
+ handleError(data)
+ } catch (err) {
+ subscriber.error(err)
+ return
}
- subscriber.error(error)
- subscriber.complete()
- return
}
// There could be overriden stream parameter in the model
// that is set in request body (transformed payload)
@@ -54,9 +53,10 @@ export function requestInference(
model.parameters?.stream === false
) {
const data = await response.json()
- if (data.error || data.message) {
- subscriber.error(data.error ?? data)
- subscriber.complete()
+ try {
+ handleError(data)
+ } catch (err) {
+ subscriber.error(err)
return
}
if (transformResponse) {
@@ -91,13 +91,10 @@ export function requestInference(
const toParse = cachedLines + line
if (!line.includes('data: [DONE]')) {
const data = JSON.parse(toParse.replace('data: ', ''))
- if (
- 'error' in data ||
- 'message' in data ||
- 'detail' in data
- ) {
- subscriber.error(data.error ?? data)
- subscriber.complete()
+ try {
+ handleError(data)
+ } catch (err) {
+ subscriber.error(err)
return
}
content += data.choices[0]?.delta?.content ?? ''
@@ -118,3 +115,18 @@ export function requestInference(
.catch((err) => subscriber.error(err))
})
}
+
+/**
+ * Handle error and normalize it to a common format.
+ * @param data
+ */
+const handleError = (data: any) => {
+ if (
+ data.error ||
+ data.message ||
+ data.detail ||
+ (Array.isArray(data) && data.length && data[0].error)
+ ) {
+ throw data.error ?? data[0]?.error ?? data
+ }
+}
diff --git a/core/src/browser/extensions/hardwareManagement.ts b/core/src/browser/extensions/hardwareManagement.ts
new file mode 100644
index 000000000..1f7c36287
--- /dev/null
+++ b/core/src/browser/extensions/hardwareManagement.ts
@@ -0,0 +1,26 @@
+import { HardwareInformation } from '../../types'
+import { BaseExtension, ExtensionTypeEnum } from '../extension'
+
+/**
+ * Engine management extension. Persists and retrieves engine management.
+ * @abstract
+ * @extends BaseExtension
+ */
+export abstract class HardwareManagementExtension extends BaseExtension {
+ type(): ExtensionTypeEnum | undefined {
+ return ExtensionTypeEnum.Hardware
+ }
+
+ /**
+ * @returns A Promise that resolves to an object of list hardware.
+ */
+ abstract getHardware(): Promise
+
+ /**
+ * @returns A Promise that resolves to an object of set active gpus.
+ */
+ abstract setAvtiveGpu(data: { gpus: number[] }): Promise<{
+ message: string
+ activated_gpus: number[]
+ }>
+}
diff --git a/core/src/browser/extensions/index.test.ts b/core/src/browser/extensions/index.test.ts
index 26cbda8c5..bc5a7c358 100644
--- a/core/src/browser/extensions/index.test.ts
+++ b/core/src/browser/extensions/index.test.ts
@@ -1,6 +1,5 @@
import { ConversationalExtension } from './index';
import { InferenceExtension } from './index';
-import { MonitoringExtension } from './index';
import { AssistantExtension } from './index';
import { ModelExtension } from './index';
import * as Engines from './index';
@@ -14,10 +13,6 @@ describe('index.ts exports', () => {
expect(InferenceExtension).toBeDefined();
});
- test('should export MonitoringExtension', () => {
- expect(MonitoringExtension).toBeDefined();
- });
-
test('should export AssistantExtension', () => {
expect(AssistantExtension).toBeDefined();
});
@@ -29,4 +24,4 @@ describe('index.ts exports', () => {
test('should export Engines', () => {
expect(Engines).toBeDefined();
});
-});
\ No newline at end of file
+});
diff --git a/core/src/browser/extensions/index.ts b/core/src/browser/extensions/index.ts
index 9dbfe1afe..f11c7b09f 100644
--- a/core/src/browser/extensions/index.ts
+++ b/core/src/browser/extensions/index.ts
@@ -9,10 +9,7 @@ export { ConversationalExtension } from './conversational'
*/
export { InferenceExtension } from './inference'
-/**
- * Monitoring extension for system monitoring.
- */
-export { MonitoringExtension } from './monitoring'
+
/**
* Assistant extension for managing assistants.
@@ -33,3 +30,8 @@ export * from './engines'
* Engines Management
*/
export * from './enginesManagement'
+
+/**
+ * Hardware Management
+ */
+export * from './hardwareManagement'
diff --git a/core/src/browser/extensions/monitoring.test.ts b/core/src/browser/extensions/monitoring.test.ts
deleted file mode 100644
index 9bba89a8c..000000000
--- a/core/src/browser/extensions/monitoring.test.ts
+++ /dev/null
@@ -1,42 +0,0 @@
-
-import { ExtensionTypeEnum } from '../extension';
-import { MonitoringExtension } from './monitoring';
-
-it('should have the correct type', () => {
- class TestMonitoringExtension extends MonitoringExtension {
- getGpuSetting(): Promise {
- throw new Error('Method not implemented.');
- }
- getResourcesInfo(): Promise {
- throw new Error('Method not implemented.');
- }
- getCurrentLoad(): Promise {
- throw new Error('Method not implemented.');
- }
- getOsInfo(): Promise {
- throw new Error('Method not implemented.');
- }
- }
- const monitoringExtension = new TestMonitoringExtension();
- expect(monitoringExtension.type()).toBe(ExtensionTypeEnum.SystemMonitoring);
-});
-
-
-it('should create an instance of MonitoringExtension', () => {
- class TestMonitoringExtension extends MonitoringExtension {
- getGpuSetting(): Promise {
- throw new Error('Method not implemented.');
- }
- getResourcesInfo(): Promise {
- throw new Error('Method not implemented.');
- }
- getCurrentLoad(): Promise {
- throw new Error('Method not implemented.');
- }
- getOsInfo(): Promise {
- throw new Error('Method not implemented.');
- }
- }
- const monitoringExtension = new TestMonitoringExtension();
- expect(monitoringExtension).toBeInstanceOf(MonitoringExtension);
-});
diff --git a/core/src/browser/extensions/monitoring.ts b/core/src/browser/extensions/monitoring.ts
deleted file mode 100644
index cb544b6b7..000000000
--- a/core/src/browser/extensions/monitoring.ts
+++ /dev/null
@@ -1,20 +0,0 @@
-import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import { GpuSetting, MonitoringInterface, OperatingSystemInfo } from '../../types'
-
-/**
- * Monitoring extension for system monitoring.
- * @extends BaseExtension
- */
-export abstract class MonitoringExtension extends BaseExtension implements MonitoringInterface {
- /**
- * Monitoring extension type.
- */
- type(): ExtensionTypeEnum | undefined {
- return ExtensionTypeEnum.SystemMonitoring
- }
-
- abstract getGpuSetting(): Promise
- abstract getResourcesInfo(): Promise
- abstract getCurrentLoad(): Promise
- abstract getOsInfo(): Promise
-}
diff --git a/core/src/types/config/appConfigEntity.ts b/core/src/types/config/appConfigEntity.ts
index 1402aeca1..bd352d22f 100644
--- a/core/src/types/config/appConfigEntity.ts
+++ b/core/src/types/config/appConfigEntity.ts
@@ -1,4 +1,5 @@
export type AppConfiguration = {
data_folder: string
quick_ask: boolean
+ distinct_id?: string
}
diff --git a/core/src/types/engine/index.ts b/core/src/types/engine/index.ts
index 7c848a279..9a6beeeff 100644
--- a/core/src/types/engine/index.ts
+++ b/core/src/types/engine/index.ts
@@ -18,6 +18,7 @@ export type EngineMetadata = {
template?: string
}
}
+ explore_models_url?: string
}
export type EngineVariant = {
diff --git a/core/src/types/hardware/index.ts b/core/src/types/hardware/index.ts
new file mode 100644
index 000000000..d154a4417
--- /dev/null
+++ b/core/src/types/hardware/index.ts
@@ -0,0 +1,55 @@
+export type Cpu = {
+ arch: string
+ cores: number
+ instructions: string[]
+ model: string
+ usage: number
+}
+
+export type GpuAdditionalInformation = {
+ compute_cap: string
+ driver_version: string
+}
+
+export type Gpu = {
+ activated: boolean
+ additional_information?: GpuAdditionalInformation
+ free_vram: number
+ id: string
+ name: string
+ total_vram: number
+ uuid: string
+ version: string
+}
+
+export type Os = {
+ name: string
+ version: string
+}
+
+export type Power = {
+ battery_life: number
+ charging_status: string
+ is_power_saving: boolean
+}
+
+export type Ram = {
+ available: number
+ total: number
+ type: string
+}
+
+export type Storage = {
+ available: number
+ total: number
+ type: string
+}
+
+export type HardwareInformation = {
+ cpu: Cpu
+ gpus: Gpu[]
+ os: Os
+ power: Power
+ ram: Ram
+ storage: Storage
+}
diff --git a/core/src/types/index.test.ts b/core/src/types/index.test.ts
index 9dc001c4d..d938feee9 100644
--- a/core/src/types/index.test.ts
+++ b/core/src/types/index.test.ts
@@ -4,7 +4,6 @@ import * as model from './model';
import * as thread from './thread';
import * as message from './message';
import * as inference from './inference';
-import * as monitoring from './monitoring';
import * as file from './file';
import * as config from './config';
import * as huggingface from './huggingface';
@@ -18,7 +17,6 @@ import * as setting from './setting';
expect(thread).toBeDefined();
expect(message).toBeDefined();
expect(inference).toBeDefined();
- expect(monitoring).toBeDefined();
expect(file).toBeDefined();
expect(config).toBeDefined();
expect(huggingface).toBeDefined();
diff --git a/core/src/types/index.ts b/core/src/types/index.ts
index e30dd18c3..3d262a6b7 100644
--- a/core/src/types/index.ts
+++ b/core/src/types/index.ts
@@ -3,7 +3,6 @@ export * from './model'
export * from './thread'
export * from './message'
export * from './inference'
-export * from './monitoring'
export * from './file'
export * from './config'
export * from './huggingface'
@@ -11,3 +10,4 @@ export * from './miscellaneous'
export * from './api'
export * from './setting'
export * from './engine'
+export * from './hardware'
diff --git a/core/src/types/miscellaneous/systemResourceInfo.ts b/core/src/types/miscellaneous/systemResourceInfo.ts
index 82db5d941..9361b79b6 100644
--- a/core/src/types/miscellaneous/systemResourceInfo.ts
+++ b/core/src/types/miscellaneous/systemResourceInfo.ts
@@ -1,33 +1,25 @@
+import { GpuAdditionalInformation } from '../hardware'
+
export type SystemResourceInfo = {
memAvailable: number
}
-export type RunMode = 'cpu' | 'gpu'
-
export type GpuSetting = {
- notify: boolean
- run_mode: RunMode
- nvidia_driver: {
- exist: boolean
- version: string
- }
- cuda: {
- exist: boolean
- version: string
- }
gpus: GpuSettingInfo[]
- gpu_highest_vram: string
- gpus_in_use: string[]
- is_initial: boolean
// TODO: This needs to be set based on user toggle in settings
vulkan: boolean
+ cpu?: any
}
export type GpuSettingInfo = {
+ activated: boolean
+ free_vram: number
id: string
- vram: string
name: string
- arch?: string
+ total_vram: number
+ uuid: string
+ version: string
+ additional_information?: GpuAdditionalInformation
}
export type SystemInformation = {
@@ -42,9 +34,6 @@ export type SupportedPlatform = SupportedPlatformTuple[number]
export type OperatingSystemInfo = {
platform: SupportedPlatform | 'unknown'
arch: string
- release: string
- machine: string
- version: string
totalMem: number
freeMem: number
}
diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts
index 482dfa1ac..6e47c9ae4 100644
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@@ -71,7 +71,7 @@ export type Model = {
/**
* The model identifier, modern version of id.
*/
- mode?: string
+ model?: string
/**
* Human-readable name that is used for UI.
@@ -150,6 +150,7 @@ export type ModelSettingParams = {
*/
export type ModelRuntimeParams = {
temperature?: number
+ max_temperature?: number
token_limit?: number
top_k?: number
top_p?: number
diff --git a/core/src/types/monitoring/index.test.ts b/core/src/types/monitoring/index.test.ts
deleted file mode 100644
index 56c5879e4..000000000
--- a/core/src/types/monitoring/index.test.ts
+++ /dev/null
@@ -1,13 +0,0 @@
-import * as monitoringInterface from './monitoringInterface'
-import * as resourceInfo from './resourceInfo'
-
-import * as index from './index'
-
-it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
- for (const key in monitoringInterface) {
- expect(index[key]).toBe(monitoringInterface[key])
- }
- for (const key in resourceInfo) {
- expect(index[key]).toBe(resourceInfo[key])
- }
-})
diff --git a/core/src/types/monitoring/index.ts b/core/src/types/monitoring/index.ts
deleted file mode 100644
index b96c518fd..000000000
--- a/core/src/types/monitoring/index.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-export * from './monitoringInterface'
-export * from './resourceInfo'
diff --git a/core/src/types/monitoring/monitoringInterface.ts b/core/src/types/monitoring/monitoringInterface.ts
deleted file mode 100644
index 5ab1394a1..000000000
--- a/core/src/types/monitoring/monitoringInterface.ts
+++ /dev/null
@@ -1,29 +0,0 @@
-import { GpuSetting, OperatingSystemInfo } from '../miscellaneous'
-
-/**
- * Monitoring extension for system monitoring.
- * @extends BaseExtension
- */
-export interface MonitoringInterface {
- /**
- * Returns information about the system resources.
- * @returns {Promise} A promise that resolves with the system resources information.
- */
- getResourcesInfo(): Promise
-
- /**
- * Returns the current system load.
- * @returns {Promise} A promise that resolves with the current system load.
- */
- getCurrentLoad(): Promise
-
- /**
- * Returns the GPU configuration.
- */
- getGpuSetting(): Promise
-
- /**
- * Returns information about the operating system.
- */
- getOsInfo(): Promise
-}
diff --git a/core/src/types/monitoring/resourceInfo.ts b/core/src/types/monitoring/resourceInfo.ts
deleted file mode 100644
index b19da5462..000000000
--- a/core/src/types/monitoring/resourceInfo.ts
+++ /dev/null
@@ -1,6 +0,0 @@
-export type ResourceInfo = {
- mem: {
- totalMemory: number
- usedMemory: number
- }
-}
diff --git a/docs/src/pages/docs/configure-extensions.mdx b/docs/src/pages/docs/configure-extensions.mdx
index c72d8c6a5..71d226554 100644
--- a/docs/src/pages/docs/configure-extensions.mdx
+++ b/docs/src/pages/docs/configure-extensions.mdx
@@ -55,7 +55,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Conversational",
"version": "1.0.0",
"main": "dist/index.js",
- "description": "This extension enables conversations and state persistence via your filesystem",
+ "description": "This extension enables conversations and state persistence via your filesystem.",
"url": "extension://@janhq/conversational-extension/dist/index.js"
},
"@janhq/inference-anthropic-extension": {
@@ -70,7 +70,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Anthropic Inference Engine",
"version": "1.0.2",
"main": "dist/index.js",
- "description": "This extension enables Anthropic chat completion API calls",
+ "description": "This extension enables Anthropic chat completion API calls.",
"url": "extension://@janhq/inference-anthropic-extension/dist/index.js"
},
"@janhq/inference-triton-trt-llm-extension": {
@@ -85,7 +85,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Triton-TRT-LLM Inference Engine",
"version": "1.0.0",
"main": "dist/index.js",
- "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option",
+ "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option.",
"url": "extension://@janhq/inference-triton-trt-llm-extension/dist/index.js"
},
"@janhq/inference-mistral-extension": {
@@ -100,7 +100,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "MistralAI Inference Engine",
"version": "1.0.1",
"main": "dist/index.js",
- "description": "This extension enables Mistral chat completion API calls",
+ "description": "This extension enables Mistral chat completion API calls.",
"url": "extension://@janhq/inference-mistral-extension/dist/index.js"
},
"@janhq/inference-martian-extension": {
@@ -115,7 +115,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Martian Inference Engine",
"version": "1.0.1",
"main": "dist/index.js",
- "description": "This extension enables Martian chat completion API calls",
+ "description": "This extension enables Martian chat completion API calls.",
"url": "extension://@janhq/inference-martian-extension/dist/index.js"
},
"@janhq/inference-openrouter-extension": {
@@ -130,7 +130,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "OpenRouter Inference Engine",
"version": "1.0.0",
"main": "dist/index.js",
- "description": "This extension enables Open Router chat completion API calls",
+ "description": "This extension enables Open Router chat completion API calls.",
"url": "extension://@janhq/inference-openrouter-extension/dist/index.js"
},
"@janhq/inference-nvidia-extension": {
@@ -145,7 +145,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "NVIDIA NIM Inference Engine",
"version": "1.0.1",
"main": "dist/index.js",
- "description": "This extension enables NVIDIA chat completion API calls",
+ "description": "This extension enables NVIDIA chat completion API calls.",
"url": "extension://@janhq/inference-nvidia-extension/dist/index.js"
},
"@janhq/inference-groq-extension": {
@@ -160,7 +160,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Groq Inference Engine",
"version": "1.0.1",
"main": "dist/index.js",
- "description": "This extension enables fast Groq chat completion API calls",
+ "description": "This extension enables fast Groq chat completion API calls.",
"url": "extension://@janhq/inference-groq-extension/dist/index.js"
},
"@janhq/inference-openai-extension": {
@@ -175,7 +175,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "OpenAI Inference Engine",
"version": "1.0.2",
"main": "dist/index.js",
- "description": "This extension enables OpenAI chat completion API calls",
+ "description": "This extension enables OpenAI chat completion API calls.",
"url": "extension://@janhq/inference-openai-extension/dist/index.js"
},
"@janhq/inference-cohere-extension": {
@@ -190,7 +190,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Cohere Inference Engine",
"version": "1.0.0",
"main": "dist/index.js",
- "description": "This extension enables Cohere chat completion API calls",
+ "description": "This extension enables Cohere chat completion API calls.",
"url": "extension://@janhq/inference-cohere-extension/dist/index.js"
},
"@janhq/model-extension": {
@@ -205,7 +205,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Model Management",
"version": "1.0.33",
"main": "dist/index.js",
- "description": "Model Management Extension provides model exploration and seamless downloads",
+ "description": "Model Management Extension provides model exploration and seamless downloads.",
"url": "extension://@janhq/model-extension/dist/index.js"
},
"@janhq/monitoring-extension": {
@@ -220,7 +220,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "System Monitoring",
"version": "1.0.10",
"main": "dist/index.js",
- "description": "This extension provides system health and OS level data",
+ "description": "This extension provides system health and OS level data.",
"url": "extension://@janhq/monitoring-extension/dist/index.js"
},
"@janhq/assistant-extension": {
@@ -235,7 +235,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Jan Assistant",
"version": "1.0.1",
"main": "dist/index.js",
- "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models",
+ "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models.",
"url": "extension://@janhq/assistant-extension/dist/index.js"
},
"@janhq/tensorrt-llm-extension": {
diff --git a/docs/src/pages/docs/install-engines.mdx b/docs/src/pages/docs/install-engines.mdx
index 4e99d337d..cded71c83 100644
--- a/docs/src/pages/docs/install-engines.mdx
+++ b/docs/src/pages/docs/install-engines.mdx
@@ -47,8 +47,8 @@ To add a new remote engine:
|-------|-------------|----------|
| Engine Name | Name for your engine (e.g., "OpenAI", "Claude") | ✓ |
| API URL | The base URL of the provider's API | ✓ |
-| API Key | Your authentication key from the provider | ✓ |
-| Model List URL | URL for fetching available models | |
+| API Key | Your authentication key to activate this engine | ✓ |
+| Model List URL | The endpoint URL to fetch available models |
| API Key Template | Custom authorization header format | |
| Request Format Conversion | Function to convert Jan's request format to provider's format | |
| Response Format Conversion | Function to convert provider's response format to Jan's format | |
diff --git a/electron/main.ts b/electron/main.ts
index 6ce7f476a..42d16bb74 100644
--- a/electron/main.ts
+++ b/electron/main.ts
@@ -28,6 +28,7 @@ import { setupReactDevTool } from './utils/dev'
import { trayManager } from './managers/tray'
import { logSystemInfo } from './utils/system'
import { registerGlobalShortcuts } from './utils/shortcut'
+import { registerLogger } from './utils/logger'
const preloadPath = join(__dirname, 'preload.js')
const rendererPath = join(__dirname, '..', 'renderer')
@@ -79,6 +80,7 @@ app
})
.then(setupCore)
.then(createUserSpace)
+ .then(registerLogger)
.then(migrate)
.then(setupExtensions)
.then(setupMenu)
diff --git a/electron/package.json b/electron/package.json
index 700f009a5..59761623c 100644
--- a/electron/package.json
+++ b/electron/package.json
@@ -1,6 +1,6 @@
{
"name": "jan",
- "version": "0.1.4",
+ "version": "0.1.1737985524",
"main": "./build/main.js",
"author": "Jan ",
"license": "MIT",
diff --git a/extensions/monitoring-extension/src/node/logger.ts b/electron/utils/logger.ts
similarity index 77%
rename from extensions/monitoring-extension/src/node/logger.ts
rename to electron/utils/logger.ts
index ca64ea2d9..48af0b93a 100644
--- a/extensions/monitoring-extension/src/node/logger.ts
+++ b/electron/utils/logger.ts
@@ -1,16 +1,28 @@
-import fs from 'fs'
+import {
+ createWriteStream,
+ existsSync,
+ mkdirSync,
+ readdir,
+ stat,
+ unlink,
+ writeFileSync,
+} from 'fs'
import util from 'util'
import {
getAppConfigurations,
getJanDataFolderPath,
Logger,
+ LoggerManager,
} from '@janhq/core/node'
import path, { join } from 'path'
-export class FileLogger extends Logger {
+/**
+ * File Logger
+ */
+export class FileLogger implements Logger {
name = 'file'
logCleaningInterval: number = 120000
- timeout: NodeJS.Timeout | null = null
+ timeout: NodeJS.Timeout | undefined
appLogPath: string = './'
logEnabled: boolean = true
@@ -18,14 +30,13 @@ export class FileLogger extends Logger {
logEnabled: boolean = true,
logCleaningInterval: number = 120000
) {
- super()
this.logEnabled = logEnabled
if (logCleaningInterval) this.logCleaningInterval = logCleaningInterval
const appConfigurations = getAppConfigurations()
const logFolderPath = join(appConfigurations.data_folder, 'logs')
- if (!fs.existsSync(logFolderPath)) {
- fs.mkdirSync(logFolderPath, { recursive: true })
+ if (!existsSync(logFolderPath)) {
+ mkdirSync(logFolderPath, { recursive: true })
}
this.appLogPath = join(logFolderPath, 'app.log')
@@ -69,8 +80,8 @@ export class FileLogger extends Logger {
const logDirectory = path.join(getJanDataFolderPath(), 'logs')
// Perform log cleaning
const currentDate = new Date()
- if (fs.existsSync(logDirectory))
- fs.readdir(logDirectory, (err, files) => {
+ if (existsSync(logDirectory))
+ readdir(logDirectory, (err, files) => {
if (err) {
console.error('Error reading log directory:', err)
return
@@ -78,7 +89,7 @@ export class FileLogger extends Logger {
files.forEach((file) => {
const filePath = path.join(logDirectory, file)
- fs.stat(filePath, (err, stats) => {
+ stat(filePath, (err, stats) => {
if (err) {
console.error('Error getting file stats:', err)
return
@@ -86,7 +97,7 @@ export class FileLogger extends Logger {
// Check size
if (stats.size > size) {
- fs.unlink(filePath, (err) => {
+ unlink(filePath, (err) => {
if (err) {
console.error('Error deleting log file:', err)
return
@@ -103,7 +114,7 @@ export class FileLogger extends Logger {
(1000 * 3600 * 24)
)
if (daysDifference > days) {
- fs.unlink(filePath, (err) => {
+ unlink(filePath, (err) => {
if (err) {
console.error('Error deleting log file:', err)
return
@@ -124,15 +135,20 @@ export class FileLogger extends Logger {
}
}
+/**
+ * Write log function implementation
+ * @param message
+ * @param logPath
+ */
const writeLog = (message: string, logPath: string) => {
- if (!fs.existsSync(logPath)) {
+ if (!existsSync(logPath)) {
const logDirectory = path.join(getJanDataFolderPath(), 'logs')
- if (!fs.existsSync(logDirectory)) {
- fs.mkdirSync(logDirectory)
+ if (!existsSync(logDirectory)) {
+ mkdirSync(logDirectory)
}
- fs.writeFileSync(logPath, message)
+ writeFileSync(logPath, message)
} else {
- const logFile = fs.createWriteStream(logPath, {
+ const logFile = createWriteStream(logPath, {
flags: 'a',
})
logFile.write(util.format(message) + '\n')
@@ -140,3 +156,12 @@ const writeLog = (message: string, logPath: string) => {
console.debug(message)
}
}
+
+/**
+ * Register logger for global application logging
+ */
+export const registerLogger = () => {
+ const logger = new FileLogger()
+ LoggerManager.instance().register(logger)
+ logger.cleanLogs()
+}
diff --git a/extensions/engine-management-extension/engines.mjs b/extensions/engine-management-extension/engines.mjs
index e85035423..eafe8a09c 100644
--- a/extensions/engine-management-extension/engines.mjs
+++ b/extensions/engine-management-extension/engines.mjs
@@ -6,6 +6,8 @@ import groq from './resources/groq.json' with { type: 'json' }
import martian from './resources/martian.json' with { type: 'json' }
import mistral from './resources/mistral.json' with { type: 'json' }
import nvidia from './resources/nvidia.json' with { type: 'json' }
+import deepseek from './resources/deepseek.json' with { type: 'json' }
+import googleGemini from './resources/google_gemini.json' with { type: 'json' }
import anthropicModels from './models/anthropic.json' with { type: 'json' }
import cohereModels from './models/cohere.json' with { type: 'json' }
@@ -15,6 +17,8 @@ import groqModels from './models/groq.json' with { type: 'json' }
import martianModels from './models/martian.json' with { type: 'json' }
import mistralModels from './models/mistral.json' with { type: 'json' }
import nvidiaModels from './models/nvidia.json' with { type: 'json' }
+import deepseekModels from './models/deepseek.json' with { type: 'json' }
+import googleGeminiModels from './models/google_gemini.json' with { type: 'json' }
const engines = [
anthropic,
@@ -25,6 +29,8 @@ const engines = [
mistral,
martian,
nvidia,
+ deepseek,
+ googleGemini,
]
const models = [
...anthropicModels,
@@ -35,5 +41,7 @@ const models = [
...mistralModels,
...martianModels,
...nvidiaModels,
+ ...deepseekModels,
+ ...googleGeminiModels,
]
export { engines, models }
diff --git a/extensions/engine-management-extension/models/anthropic.json b/extensions/engine-management-extension/models/anthropic.json
index 46b5893d1..0212ce1da 100644
--- a/extensions/engine-management-extension/models/anthropic.json
+++ b/extensions/engine-management-extension/models/anthropic.json
@@ -8,6 +8,7 @@
"inference_params": {
"max_tokens": 4096,
"temperature": 0.7,
+ "max_temperature": 1.0,
"stream": true
},
"engine": "anthropic"
@@ -21,6 +22,7 @@
"inference_params": {
"max_tokens": 8192,
"temperature": 0.7,
+ "max_temperature": 1.0,
"stream": true
},
"engine": "anthropic"
@@ -34,6 +36,7 @@
"inference_params": {
"max_tokens": 8192,
"temperature": 0.7,
+ "max_temperature": 1.0,
"stream": true
},
"engine": "anthropic"
diff --git a/extensions/engine-management-extension/models/cohere.json b/extensions/engine-management-extension/models/cohere.json
index 458e4278b..96a830637 100644
--- a/extensions/engine-management-extension/models/cohere.json
+++ b/extensions/engine-management-extension/models/cohere.json
@@ -8,6 +8,7 @@
"inference_params": {
"max_tokens": 4096,
"temperature": 0.7,
+ "max_temperature": 1.0,
"stream": false
},
"engine": "cohere"
@@ -21,6 +22,7 @@
"inference_params": {
"max_tokens": 4096,
"temperature": 0.7,
+ "max_temperature": 1.0,
"stream": false
},
"engine": "cohere"
diff --git a/extensions/engine-management-extension/models/deepseek.json b/extensions/engine-management-extension/models/deepseek.json
new file mode 100644
index 000000000..29d5406bf
--- /dev/null
+++ b/extensions/engine-management-extension/models/deepseek.json
@@ -0,0 +1,28 @@
+[
+ {
+ "model": "deepseek-chat",
+ "object": "model",
+ "name": "DeepSeek Chat",
+ "version": "1.0",
+ "description": "The deepseek-chat model has been upgraded to DeepSeek-V3. deepseek-reasoner points to the new model DeepSeek-R1",
+ "inference_params": {
+ "max_tokens": 8192,
+ "temperature": 0.6,
+ "stream": true
+ },
+ "engine": "deepseek"
+ },
+ {
+ "model": "deepseek-reasoner",
+ "object": "model",
+ "name": "DeepSeek R1",
+ "version": "1.0",
+ "description": "CoT (Chain of Thought) is the reasoning content deepseek-reasoner gives before output the final answer. For details, please refer to Reasoning Model.",
+ "inference_params": {
+ "max_tokens": 8192,
+ "temperature": 0.6,
+ "stream": true
+ },
+ "engine": "deepseek"
+ }
+]
diff --git a/extensions/engine-management-extension/models/google_gemini.json b/extensions/engine-management-extension/models/google_gemini.json
new file mode 100644
index 000000000..392754ee6
--- /dev/null
+++ b/extensions/engine-management-extension/models/google_gemini.json
@@ -0,0 +1,67 @@
+[
+ {
+ "model": "gemini-2.0-flash",
+ "object": "model",
+ "name": "Gemini 2.0 Flash",
+ "version": "1.0",
+ "description": "Gemini 2.0 Flash delivers next-gen features and improved capabilities, including superior speed, native tool use, multimodal generation, and a 1M token context window.",
+ "inference_params": {
+ "max_tokens": 8192,
+ "temperature": 0.6,
+ "stream": true
+ },
+ "engine": "google_gemini"
+ },
+ {
+ "model": "gemini-2.0-flash-lite-preview",
+ "object": "model",
+ "name": "Gemini 2.0 Flash-Lite Preview",
+ "version": "1.0",
+ "description": "A Gemini 2.0 Flash model optimized for cost efficiency and low latency.",
+ "inference_params": {
+ "max_tokens": 8192,
+ "temperature": 0.6,
+ "stream": true
+ },
+ "engine": "google_gemini"
+ },
+ {
+ "model": "gemini-1.5-flash",
+ "object": "model",
+ "name": "Gemini 1.5 Flash",
+ "version": "1.0",
+ "description": "Gemini 1.5 Flash is a fast and versatile multimodal model for scaling across diverse tasks.",
+ "inference_params": {
+ "max_tokens": 8192,
+ "temperature": 0.6,
+ "stream": true
+ },
+ "engine": "google_gemini"
+ },
+ {
+ "model": "gemini-1.5-flash-8b",
+ "object": "model",
+ "name": "Gemini 1.5 Flash-8B",
+ "version": "1.0",
+ "description": "Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks.",
+ "inference_params": {
+ "max_tokens": 8192,
+ "temperature": 0.6,
+ "stream": true
+ },
+ "engine": "google_gemini"
+ },
+ {
+ "model": "gemini-1.5-pro",
+ "object": "model",
+ "name": "Gemini 1.5 Pro",
+ "version": "1.0",
+ "description": "Gemini 1.5 Pro is a mid-size multimodal model that is optimized for a wide-range of reasoning tasks. 1.5 Pro can process large amounts of data at once, including 2 hours of video, 19 hours of audio, codebases with 60,000 lines of code, or 2,000 pages of text. ",
+ "inference_params": {
+ "max_tokens": 8192,
+ "temperature": 0.6,
+ "stream": true
+ },
+ "engine": "google_gemini"
+ }
+]
diff --git a/extensions/engine-management-extension/models/mistral.json b/extensions/engine-management-extension/models/mistral.json
index 12fcf938d..47df5d506 100644
--- a/extensions/engine-management-extension/models/mistral.json
+++ b/extensions/engine-management-extension/models/mistral.json
@@ -8,6 +8,7 @@
"inference_params": {
"max_tokens": 32000,
"temperature": 0.7,
+ "max_temperature": 1.0,
"top_p": 0.95,
"stream": true
},
@@ -22,6 +23,7 @@
"inference_params": {
"max_tokens": 32000,
"temperature": 0.7,
+ "max_temperature": 1.0,
"top_p": 0.95,
"stream": true
},
@@ -36,6 +38,7 @@
"inference_params": {
"max_tokens": 32000,
"temperature": 0.7,
+ "max_temperature": 1.0,
"top_p": 0.95,
"stream": true
},
diff --git a/extensions/engine-management-extension/models/nvidia.json b/extensions/engine-management-extension/models/nvidia.json
index dfce9f8bc..cb6f9dec1 100644
--- a/extensions/engine-management-extension/models/nvidia.json
+++ b/extensions/engine-management-extension/models/nvidia.json
@@ -8,6 +8,7 @@
"inference_params": {
"max_tokens": 1024,
"temperature": 0.3,
+ "max_temperature": 1.0,
"top_p": 1,
"stream": false,
"frequency_penalty": 0,
diff --git a/extensions/engine-management-extension/models/openai.json b/extensions/engine-management-extension/models/openai.json
index 8f59b42ea..5c1e70b5a 100644
--- a/extensions/engine-management-extension/models/openai.json
+++ b/extensions/engine-management-extension/models/openai.json
@@ -79,12 +79,7 @@
"description": "OpenAI o1 is a new model with complex reasoning",
"format": "api",
"inference_params": {
- "max_tokens": 100000,
- "temperature": 1,
- "top_p": 1,
- "stream": true,
- "frequency_penalty": 0,
- "presence_penalty": 0
+ "max_tokens": 100000
},
"engine": "openai"
},
@@ -97,11 +92,7 @@
"format": "api",
"inference_params": {
"max_tokens": 32768,
- "temperature": 1,
- "top_p": 1,
- "stream": true,
- "frequency_penalty": 0,
- "presence_penalty": 0
+ "stream": true
},
"engine": "openai"
},
@@ -114,11 +105,20 @@
"format": "api",
"inference_params": {
"max_tokens": 65536,
- "temperature": 1,
- "top_p": 1,
- "stream": true,
- "frequency_penalty": 0,
- "presence_penalty": 0
+ "stream": true
+ },
+ "engine": "openai"
+ },
+ {
+ "model": "o3-mini",
+ "object": "model",
+ "name": "OpenAI o3-mini",
+ "version": "1.0",
+ "description": "OpenAI most recent reasoning model, providing high intelligence at the same cost and latency targets of o1-mini.",
+ "format": "api",
+ "inference_params": {
+ "max_tokens": 100000,
+ "stream": true
},
"engine": "openai"
}
diff --git a/extensions/engine-management-extension/models/openrouter.json b/extensions/engine-management-extension/models/openrouter.json
index b9714bb57..bf132533c 100644
--- a/extensions/engine-management-extension/models/openrouter.json
+++ b/extensions/engine-management-extension/models/openrouter.json
@@ -1,16 +1,91 @@
[
{
- "model": "open-router-auto",
+ "model": "deepseek/deepseek-r1:free",
"object": "model",
- "name": "OpenRouter",
+ "name": "DeepSeek: R1",
"version": "1.0",
- "description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+ "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"inference_params": {
- "max_tokens": 128000,
"temperature": 0.7,
"top_p": 0.95,
"frequency_penalty": 0,
- "presence_penalty": 0
+ "presence_penalty": 0,
+ "stream": true
+ },
+ "engine": "openrouter"
+ },
+ {
+ "model": "deepseek/deepseek-r1-distill-llama-70b:free",
+ "object": "model",
+ "name": "DeepSeek: R1 Distill Llama 70B",
+ "version": "1.0",
+ "description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+ "inference_params": {
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "frequency_penalty": 0,
+ "presence_penalty": 0,
+ "stream": true
+ },
+ "engine": "openrouter"
+ },
+ {
+ "model": "deepseek/deepseek-r1-distill-llama-70b:free",
+ "object": "model",
+ "name": "DeepSeek: R1 Distill Llama 70B",
+ "version": "1.0",
+ "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+ "inference_params": {
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "frequency_penalty": 0,
+ "presence_penalty": 0,
+ "stream": true
+ },
+ "engine": "openrouter"
+ },
+ {
+ "model": "meta-llama/llama-3.1-405b-instruct:free",
+ "object": "model",
+ "name": "Meta: Llama 3.1 405B Instruct",
+ "version": "1.0",
+ "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+ "inference_params": {
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "frequency_penalty": 0,
+ "presence_penalty": 0,
+ "stream": true
+ },
+ "engine": "openrouter"
+ },
+ {
+ "model": "qwen/qwen-vl-plus:free",
+ "object": "model",
+ "name": "Qwen: Qwen VL Plus",
+ "version": "1.0",
+ "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+ "inference_params": {
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "frequency_penalty": 0,
+ "presence_penalty": 0,
+ "stream": true
+ },
+ "engine": "openrouter"
+ },
+ {
+ "model": "qwen/qwen2.5-vl-72b-instruct:free",
+ "object": "model",
+ "name": "Qwen: Qwen2.5 VL 72B Instruct",
+ "version": "1.0",
+ "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+ "inference_params": {
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "frequency_penalty": 0,
+ "presence_penalty": 0,
+ "stream": true
},
"engine": "openrouter"
}
diff --git a/extensions/engine-management-extension/package.json b/extensions/engine-management-extension/package.json
index 96f962ffd..571a3852b 100644
--- a/extensions/engine-management-extension/package.json
+++ b/extensions/engine-management-extension/package.json
@@ -29,12 +29,10 @@
},
"dependencies": {
"@janhq/core": "../../core/package.tgz",
- "cpu-instructions": "^0.0.13",
"ky": "^1.7.2",
"p-queue": "^8.0.1"
},
"bundledDependencies": [
- "cpu-instructions",
"@janhq/core"
],
"engines": {
diff --git a/extensions/engine-management-extension/resources/anthropic.json b/extensions/engine-management-extension/resources/anthropic.json
index 12a3f08b8..4172bcd0b 100644
--- a/extensions/engine-management-extension/resources/anthropic.json
+++ b/extensions/engine-management-extension/resources/anthropic.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-anthropic-extension",
+ "id": "anthropic",
"type": "remote",
"engine": "anthropic",
"url": "https://console.anthropic.com/settings/keys",
@@ -10,13 +10,14 @@
"transform_req": {
"chat_completions": {
"url": "https://api.anthropic.com/v1/messages",
- "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": \"{{ input_request.messages.0.content }}\", \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": \"{{ message.role}}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
+ "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": {{ tojson(input_request.messages.0.content) }}, \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"metadata\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
}
},
"transform_resp": {
"chat_completions": {
- "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": \"{{ input_request.model }}\", \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"{{ input_request.role }}\", \"content\": {% if input_request.content and input_request.content.0.type == \"text\" %} \"{{input_request.content.0.text}}\" {% else %} null {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.stop_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.usage.input_tokens }}, \"completion_tokens\": {{ input_request.usage.output_tokens }}, \"total_tokens\": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 }, \"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
+ "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {{tojson(input_request)}} {% endif %}"
}
- }
+ },
+ "explore_models_url": "https://docs.anthropic.com/en/docs/about-claude/models"
}
}
diff --git a/extensions/engine-management-extension/resources/cohere.json b/extensions/engine-management-extension/resources/cohere.json
index b10e00e5b..43cd0da5b 100644
--- a/extensions/engine-management-extension/resources/cohere.json
+++ b/extensions/engine-management-extension/resources/cohere.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-cohere-extension",
+ "id": "cohere",
"type": "remote",
"engine": "cohere",
"url": "https://dashboard.cohere.com/api-keys",
@@ -10,13 +10,14 @@
"transform_req": {
"chat_completions": {
"url": "https://api.cohere.ai/v1/chat",
- "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": \"{{ input_request.messages.0.content }}\", {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
+ "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": {{ tojson(input_request.messages.0.content) }}, {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
}
},
"transform_resp": {
"chat_completions": {
"template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.event_type == \"text-generation\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.text }}\" {% else %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.event_type == \"stream-end\" %} \"finish_reason\": \"{{ input_request.finish_reason }}\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.generation_id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": {% if input_request.model %} \"{{ input_request.model }}\" {% else %} \"command-r-plus-08-2024\" {% endif %}, \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"assistant\", \"content\": {% if not input_request.text %} null {% else %} \"{{ input_request.text }}\" {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.finish_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.meta.tokens.input_tokens }}, \"completion_tokens\": {{ input_request.meta.tokens.output_tokens }},\"total_tokens\": {{ input_request.meta.tokens.input_tokens + input_request.meta.tokens.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 },\"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
}
- }
+ },
+ "explore_models_url": "https://docs.cohere.com/v2/docs/models"
}
}
diff --git a/extensions/engine-management-extension/resources/deepseek.json b/extensions/engine-management-extension/resources/deepseek.json
new file mode 100644
index 000000000..214ec3b23
--- /dev/null
+++ b/extensions/engine-management-extension/resources/deepseek.json
@@ -0,0 +1,23 @@
+{
+ "id": "deepseek",
+ "type": "remote",
+ "engine": "deepseek",
+ "url": "https://platform.deepseek.com/api_keys",
+ "api_key": "",
+ "metadata": {
+ "get_models_url": "https://api.deepseek.com/models",
+ "header_template": "Authorization: Bearer {{api_key}}",
+ "transform_req": {
+ "chat_completions": {
+ "url": "https://api.deepseek.com/chat/completions",
+ "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+ }
+ },
+ "transform_resp": {
+ "chat_completions": {
+ "template": "{{tojson(input_request)}}"
+ }
+ },
+ "explore_models_url": "https://api-docs.deepseek.com/quick_start/pricing"
+ }
+}
diff --git a/extensions/engine-management-extension/resources/google_gemini.json b/extensions/engine-management-extension/resources/google_gemini.json
new file mode 100644
index 000000000..e0fa809a5
--- /dev/null
+++ b/extensions/engine-management-extension/resources/google_gemini.json
@@ -0,0 +1,23 @@
+{
+ "id": "google_gemini",
+ "type": "remote",
+ "engine": "google_gemini",
+ "url": "https://aistudio.google.com/apikey",
+ "api_key": "",
+ "metadata": {
+ "get_models_url": "https://generativelanguage.googleapis.com/v1beta/models",
+ "header_template": "Authorization: Bearer {{api_key}}",
+ "transform_req": {
+ "chat_completions": {
+ "url": "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
+ "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+ }
+ },
+ "transform_resp": {
+ "chat_completions": {
+ "template": "{{tojson(input_request)}}"
+ }
+ },
+ "explore_models_url": "https://ai.google.dev/gemini-api/docs/models/gemini"
+ }
+}
diff --git a/extensions/engine-management-extension/resources/groq.json b/extensions/engine-management-extension/resources/groq.json
index 60d553a92..87d215ab2 100644
--- a/extensions/engine-management-extension/resources/groq.json
+++ b/extensions/engine-management-extension/resources/groq.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-groq-extension",
+ "id": "groq",
"type": "remote",
"engine": "groq",
"url": "https://console.groq.com/keys",
@@ -15,8 +15,9 @@
},
"transform_resp": {
"chat_completions": {
- "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+ "template": "{{tojson(input_request)}}"
}
- }
+ },
+ "explore_models_url": "https://console.groq.com/docs/models"
}
}
diff --git a/extensions/engine-management-extension/resources/martian.json b/extensions/engine-management-extension/resources/martian.json
index 3a65f3981..3fd458660 100644
--- a/extensions/engine-management-extension/resources/martian.json
+++ b/extensions/engine-management-extension/resources/martian.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-martian-extension",
+ "id": "martian",
"type": "remote",
"engine": "martian",
"url": "https://withmartian.com/dashboard",
@@ -15,8 +15,9 @@
},
"transform_resp": {
"chat_completions": {
- "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+ "template": "{{tojson(input_request)}}"
}
- }
+ },
+ "explore_models_url": "https://withmartian.github.io/llm-adapters/"
}
}
diff --git a/extensions/engine-management-extension/resources/mistral.json b/extensions/engine-management-extension/resources/mistral.json
index 3f447dc4c..4a24471a2 100644
--- a/extensions/engine-management-extension/resources/mistral.json
+++ b/extensions/engine-management-extension/resources/mistral.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-mistral-extension",
+ "id": "mistral",
"type": "remote",
"engine": "mistral",
"url": "https://console.mistral.ai/api-keys/",
@@ -17,6 +17,7 @@
"chat_completions": {
"template": "{{tojson(input_request)}}"
}
- }
+ },
+ "explore_models_url": "https://docs.mistral.ai/getting-started/models/models_overview/"
}
}
diff --git a/extensions/engine-management-extension/resources/nvidia.json b/extensions/engine-management-extension/resources/nvidia.json
index 240130090..573bad4f6 100644
--- a/extensions/engine-management-extension/resources/nvidia.json
+++ b/extensions/engine-management-extension/resources/nvidia.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-nvidia-extension",
+ "id": "nvidia",
"type": "remote",
"engine": "nvidia",
"url": "https://org.ngc.nvidia.com/setup/personal-keys",
@@ -15,8 +15,9 @@
},
"transform_resp": {
"chat_completions": {
- "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+ "template": "{{tojson(input_request)}}"
}
- }
+ },
+ "explore_models_url": "https://build.nvidia.com/models"
}
}
diff --git a/extensions/engine-management-extension/resources/openai.json b/extensions/engine-management-extension/resources/openai.json
index 97effd42a..42f421072 100644
--- a/extensions/engine-management-extension/resources/openai.json
+++ b/extensions/engine-management-extension/resources/openai.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-openai-extension",
+ "id": "openai",
"type": "remote",
"engine": "openai",
"url": "https://platform.openai.com/account/api-keys",
@@ -10,13 +10,14 @@
"transform_req": {
"chat_completions": {
"url": "https://api.openai.com/v1/chat/completions",
- "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
+ "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% set first = false %} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
}
},
"transform_resp": {
"chat_completions": {
- "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+ "template": "{{tojson(input_request)}}"
}
- }
+ },
+ "explore_models_url": "https://platform.openai.com/docs/models"
}
}
diff --git a/extensions/engine-management-extension/resources/openrouter.json b/extensions/engine-management-extension/resources/openrouter.json
index 45dc48414..798199708 100644
--- a/extensions/engine-management-extension/resources/openrouter.json
+++ b/extensions/engine-management-extension/resources/openrouter.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-openrouter-extension",
+ "id": "openrouter",
"type": "remote",
"engine": "openrouter",
"url": "https://openrouter.ai/keys",
@@ -10,13 +10,14 @@
"transform_req": {
"chat_completions": {
"url": "https://openrouter.ai/api/v1/chat/completions",
- "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+ "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
}
},
"transform_resp": {
"chat_completions": {
- "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+ "template": "{{tojson(input_request)}}"
}
- }
+ },
+ "explore_models_url": "https://openrouter.ai/models"
}
}
diff --git a/extensions/engine-management-extension/rolldown.config.mjs b/extensions/engine-management-extension/rolldown.config.mjs
index 1290338db..d89fd9bfa 100644
--- a/extensions/engine-management-extension/rolldown.config.mjs
+++ b/extensions/engine-management-extension/rolldown.config.mjs
@@ -13,9 +13,19 @@ export default defineConfig([
NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
API_URL: JSON.stringify('http://127.0.0.1:39291'),
SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
+ PLATFORM: JSON.stringify(process.platform),
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
DEFAULT_REMOTE_MODELS: JSON.stringify(models),
+ DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
+ `{ {% set first = true %} {% for key, value in input_request %} {% if key == "messages" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} {% if not first %},{% endif %} "{{ key }}": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }`
+ ),
+ DEFAULT_RESPONSE_BODY_TRANSFORM: JSON.stringify(
+ '{{tojson(input_request)}}'
+ ),
+ DEFAULT_REQUEST_HEADERS_TRANSFORM: JSON.stringify(
+ 'Authorization: Bearer {{api_key}}'
+ ),
},
},
{
@@ -29,15 +39,4 @@ export default defineConfig([
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
},
},
- {
- input: 'src/node/cpuInfo.ts',
- output: {
- format: 'cjs',
- file: 'dist/node/cpuInfo.js',
- },
- external: ['cpu-instructions'],
- resolve: {
- extensions: ['.ts', '.js', '.svg'],
- },
- },
])
diff --git a/extensions/engine-management-extension/src/@types/global.d.ts b/extensions/engine-management-extension/src/@types/global.d.ts
index 2d520d5f9..55874ea9a 100644
--- a/extensions/engine-management-extension/src/@types/global.d.ts
+++ b/extensions/engine-management-extension/src/@types/global.d.ts
@@ -1,7 +1,11 @@
declare const API_URL: string
declare const CORTEX_ENGINE_VERSION: string
+declare const PLATFORM: string
declare const SOCKET_URL: string
declare const NODE: string
+declare const DEFAULT_REQUEST_PAYLOAD_TRANSFORM: string
+declare const DEFAULT_RESPONSE_BODY_TRANSFORM: string
+declare const DEFAULT_REQUEST_HEADERS_TRANSFORM: string
declare const DEFAULT_REMOTE_ENGINES: ({
id: string
diff --git a/extensions/engine-management-extension/src/index.ts b/extensions/engine-management-extension/src/index.ts
index 0d30bf4ea..e2730cc71 100644
--- a/extensions/engine-management-extension/src/index.ts
+++ b/extensions/engine-management-extension/src/index.ts
@@ -19,12 +19,16 @@ import ky, { HTTPError } from 'ky'
import PQueue from 'p-queue'
import { EngineError } from './error'
import { getJanDataFolderPath } from '@janhq/core'
+import { engineVariant } from './utils'
+interface ModelList {
+ data: Model[]
+}
/**
- * JSONEngineManagementExtension is a EngineManagementExtension implementation that provides
+ * JanEngineManagementExtension is a EngineManagementExtension implementation that provides
* functionality for managing engines.
*/
-export default class JSONEngineManagementExtension extends EngineManagementExtension {
+export default class JanEngineManagementExtension extends EngineManagementExtension {
queue = new PQueue({ concurrency: 1 })
/**
@@ -63,13 +67,12 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
* @returns A Promise that resolves to an object of list engines.
*/
async getRemoteModels(name: string): Promise {
- return this.queue.add(() =>
- ky
- .get(`${API_URL}/v1/models/remote/${name}`)
- .json()
- .then((e) => e)
- .catch(() => [])
- ) as Promise
+ return ky
+ .get(`${API_URL}/v1/models/remote/${name}`)
+ .json()
+ .catch(() => ({
+ data: [],
+ })) as Promise
}
/**
@@ -138,9 +141,38 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
* Add a new remote engine
* @returns A Promise that resolves to intall of engine.
*/
- async addRemoteEngine(engineConfig: EngineConfig) {
+ async addRemoteEngine(
+ engineConfig: EngineConfig,
+ persistModels: boolean = true
+ ) {
+ // Populate default settings
+ if (
+ engineConfig.metadata?.transform_req?.chat_completions &&
+ !engineConfig.metadata.transform_req.chat_completions.template
+ )
+ engineConfig.metadata.transform_req.chat_completions.template =
+ DEFAULT_REQUEST_PAYLOAD_TRANSFORM
+
+ if (
+ engineConfig.metadata?.transform_resp?.chat_completions &&
+ !engineConfig.metadata.transform_resp.chat_completions?.template
+ )
+ engineConfig.metadata.transform_resp.chat_completions.template =
+ DEFAULT_RESPONSE_BODY_TRANSFORM
+
+ if (engineConfig.metadata && !engineConfig.metadata?.header_template)
+ engineConfig.metadata.header_template = DEFAULT_REQUEST_HEADERS_TRANSFORM
+
return this.queue.add(() =>
- ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => e)
+ ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => {
+ if (persistModels && engineConfig.metadata?.get_models_url) {
+ // Pull /models from remote models endpoint
+ return this.populateRemoteModels(engineConfig)
+ .then(() => e)
+ .catch(() => e)
+ }
+ return e
+ })
) as Promise<{ messages: string }>
}
@@ -161,9 +193,25 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
* @param model - Remote model object.
*/
async addRemoteModel(model: Model) {
- return this.queue.add(() =>
- ky.post(`${API_URL}/v1/models/add`, { json: model }).then((e) => e)
- )
+ return this.queue
+ .add(() =>
+ ky
+ .post(`${API_URL}/v1/models/add`, {
+ json: {
+ inference_params: {
+ max_tokens: 4096,
+ temperature: 0.7,
+ top_p: 0.95,
+ stream: true,
+ frequency_penalty: 0,
+ presence_penalty: 0,
+ },
+ ...model,
+ },
+ })
+ .then((e) => e)
+ )
+ .then(() => {})
}
/**
@@ -245,11 +293,7 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
error instanceof EngineError
) {
const systemInfo = await systemInformation()
- const variant = await executeOnMain(
- NODE,
- 'engineVariant',
- systemInfo.gpuSetting
- )
+ const variant = await engineVariant(systemInfo.gpuSetting)
await this.setDefaultEngineVariant(InferenceEngine.cortex_llamacpp, {
variant: variant,
version: `${CORTEX_ENGINE_VERSION}`,
@@ -293,14 +337,40 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
data.api_key = api_key
/// END - Migrate legacy api key settings
- await this.addRemoteEngine(data).catch(console.error)
+ await this.addRemoteEngine(data, false).catch(console.error)
})
)
events.emit(EngineEvent.OnEngineUpdate, {})
- DEFAULT_REMOTE_MODELS.forEach(async (data: Model) => {
- await this.addRemoteModel(data).catch(() => {})
- })
+ await Promise.all(
+ DEFAULT_REMOTE_MODELS.map((data: Model) =>
+ this.addRemoteModel(data).catch(() => {})
+ )
+ )
events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
}
}
+
+ /**
+ * Pulls models list from the remote provider and persist
+ * @param engineConfig
+ * @returns
+ */
+ private populateRemoteModels = async (engineConfig: EngineConfig) => {
+ return this.getRemoteModels(engineConfig.engine)
+ .then((models: ModelList) => {
+ if (models?.data)
+ Promise.all(
+ models.data.map((model) =>
+ this.addRemoteModel({
+ ...model,
+ engine: engineConfig.engine as InferenceEngine,
+ model: model.model ?? model.id,
+ }).catch(console.info)
+ )
+ ).then(() => {
+ events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
+ })
+ })
+ .catch(console.info)
+ }
}
diff --git a/extensions/engine-management-extension/src/node/cpuInfo.ts b/extensions/engine-management-extension/src/node/cpuInfo.ts
deleted file mode 100644
index 4366a995b..000000000
--- a/extensions/engine-management-extension/src/node/cpuInfo.ts
+++ /dev/null
@@ -1,27 +0,0 @@
-import { cpuInfo } from 'cpu-instructions'
-
-// Check the CPU info and determine the supported instruction set
-const info = cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
- ? 'avx512'
- : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
- ? 'avx2'
- : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
- ? 'avx'
- : 'noavx'
-
-// Send the result and wait for confirmation before exiting
-new Promise((resolve, reject) => {
- // @ts-ignore
- process.send(info, (error: Error | null) => {
- if (error) {
- reject(error)
- } else {
- resolve()
- }
- })
-})
- .then(() => process.exit(0))
- .catch((error) => {
- console.error('Failed to send info:', error)
- process.exit(1)
- })
diff --git a/extensions/engine-management-extension/src/node/index.test.ts b/extensions/engine-management-extension/src/node/index.test.ts
index c73feb9c6..aa2ac8be8 100644
--- a/extensions/engine-management-extension/src/node/index.test.ts
+++ b/extensions/engine-management-extension/src/node/index.test.ts
@@ -1,7 +1,6 @@
import { describe, expect, it } from '@jest/globals'
import engine from './index'
-import { GpuSetting } from '@janhq/core/node'
-import { cpuInfo } from 'cpu-instructions'
+import { GpuSetting } from '@janhq/core'
import { fork } from 'child_process'
let testSettings: GpuSetting = {
@@ -23,22 +22,12 @@ let testSettings: GpuSetting = {
}
const originalPlatform = process.platform
-jest.mock('cpu-instructions', () => ({
- cpuInfo: {
- cpuInfo: jest.fn(),
- },
-}))
-let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
-mockCpuInfo.mockReturnValue([])
-jest.mock('@janhq/core/node', () => ({
+
+jest.mock('@janhq/core', () => ({
appResourcePath: () => '.',
log: jest.fn(),
}))
-jest.mock('child_process', () => ({
- fork: jest.fn(),
-}))
-const mockFork = fork as jest.Mock
describe('test executable cortex file', () => {
afterAll(function () {
@@ -48,14 +37,7 @@ describe('test executable cortex file', () => {
})
it('executes on MacOS', () => {
- const mockProcess = {
- on: jest.fn((event, callback) => {
- if (event === 'message') {
- callback('noavx')
- }
- }),
- send: jest.fn(),
- }
+
Object.defineProperty(process, 'platform', {
value: 'darwin',
})
@@ -63,7 +45,7 @@ describe('test executable cortex file', () => {
value: 'arm64',
})
- mockFork.mockReturnValue(mockProcess)
+
expect(engine.engineVariant(testSettings)).resolves.toEqual('mac-arm64')
})
@@ -83,7 +65,7 @@ describe('test executable cortex file', () => {
}),
send: jest.fn(),
}
- mockFork.mockReturnValue(mockProcess)
+
Object.defineProperty(process, 'arch', {
value: 'x64',
})
@@ -107,7 +89,6 @@ describe('test executable cortex file', () => {
}),
send: jest.fn(),
}
- mockFork.mockReturnValue(mockProcess)
expect(engine.engineVariant()).resolves.toEqual('windows-amd64-avx')
})
@@ -145,7 +126,6 @@ describe('test executable cortex file', () => {
}),
send: jest.fn(),
}
- mockFork.mockReturnValue(mockProcess)
expect(engine.engineVariant(settings)).resolves.toEqual(
'windows-amd64-avx2-cuda-11-7'
@@ -176,26 +156,11 @@ describe('test executable cortex file', () => {
},
],
}
- mockFork.mockReturnValue({
- on: jest.fn((event, callback) => {
- if (event === 'message') {
- callback('noavx')
- }
- }),
- send: jest.fn(),
- })
expect(engine.engineVariant(settings)).resolves.toEqual(
'windows-amd64-noavx-cuda-12-0'
)
- mockFork.mockReturnValue({
- on: jest.fn((event, callback) => {
- if (event === 'message') {
- callback('avx512')
- }
- }),
- send: jest.fn(),
- })
+
expect(engine.engineVariant(settings)).resolves.toEqual(
'windows-amd64-avx2-cuda-12-0'
)
@@ -209,14 +174,6 @@ describe('test executable cortex file', () => {
...testSettings,
run_mode: 'cpu',
}
- mockFork.mockReturnValue({
- on: jest.fn((event, callback) => {
- if (event === 'message') {
- callback('noavx')
- }
- }),
- send: jest.fn(),
- })
expect(engine.engineVariant()).resolves.toEqual('linux-amd64-noavx')
})
@@ -245,16 +202,6 @@ describe('test executable cortex file', () => {
},
],
}
-
- mockFork.mockReturnValue({
- on: jest.fn((event, callback) => {
- if (event === 'message') {
- callback('avx512')
- }
- }),
- send: jest.fn(),
- })
-
expect(engine.engineVariant(settings)).resolves.toBe(
'linux-amd64-avx2-cuda-11-7'
)
@@ -284,14 +231,7 @@ describe('test executable cortex file', () => {
},
],
}
- mockFork.mockReturnValue({
- on: jest.fn((event, callback) => {
- if (event === 'message') {
- callback('avx2')
- }
- }),
- send: jest.fn(),
- })
+
expect(engine.engineVariant(settings)).resolves.toEqual(
'linux-amd64-avx2-cuda-12-0'
@@ -310,15 +250,6 @@ describe('test executable cortex file', () => {
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
cpuInstructions.forEach((instruction) => {
- mockFork.mockReturnValue({
- on: jest.fn((event, callback) => {
- if (event === 'message') {
- callback(instruction)
- }
- }),
- send: jest.fn(),
- })
-
expect(engine.engineVariant(settings)).resolves.toEqual(
`linux-amd64-${instruction}`
)
@@ -335,14 +266,7 @@ describe('test executable cortex file', () => {
}
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
cpuInstructions.forEach((instruction) => {
- mockFork.mockReturnValue({
- on: jest.fn((event, callback) => {
- if (event === 'message') {
- callback(instruction)
- }
- }),
- send: jest.fn(),
- })
+
expect(engine.engineVariant(settings)).resolves.toEqual(
`windows-amd64-${instruction}`
)
@@ -376,14 +300,7 @@ describe('test executable cortex file', () => {
}
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
cpuInstructions.forEach((instruction) => {
- mockFork.mockReturnValue({
- on: jest.fn((event, callback) => {
- if (event === 'message') {
- callback(instruction)
- }
- }),
- send: jest.fn(),
- })
+
expect(engine.engineVariant(settings)).resolves.toEqual(
`windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
)
@@ -417,14 +334,7 @@ describe('test executable cortex file', () => {
],
}
cpuInstructions.forEach((instruction) => {
- mockFork.mockReturnValue({
- on: jest.fn((event, callback) => {
- if (event === 'message') {
- callback(instruction)
- }
- }),
- send: jest.fn(),
- })
+
expect(engine.engineVariant(settings)).resolves.toEqual(
`linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
)
@@ -459,14 +369,7 @@ describe('test executable cortex file', () => {
],
}
cpuInstructions.forEach((instruction) => {
- mockFork.mockReturnValue({
- on: jest.fn((event, callback) => {
- if (event === 'message') {
- callback(instruction)
- }
- }),
- send: jest.fn(),
- })
+
expect(engine.engineVariant(settings)).resolves.toEqual(
`linux-amd64-vulkan`
)
diff --git a/extensions/engine-management-extension/src/node/index.ts b/extensions/engine-management-extension/src/node/index.ts
index 31ad90ed2..4c1daf998 100644
--- a/extensions/engine-management-extension/src/node/index.ts
+++ b/extensions/engine-management-extension/src/node/index.ts
@@ -2,111 +2,10 @@ import * as path from 'path'
import {
appResourcePath,
getJanDataFolderPath,
- GpuSetting,
log,
} from '@janhq/core/node'
-import { fork } from 'child_process'
import { mkdir, readdir, symlink } from 'fs/promises'
-/**
- * The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
- * @param settings
- * @returns
- */
-const gpuRunMode = (settings?: GpuSetting): string => {
- if (process.platform === 'darwin')
- // MacOS now has universal binaries
- return ''
-
- if (!settings) return ''
-
- return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
-}
-
-/**
- * The OS & architecture that the current process is running on.
- * @returns win, mac-x64, mac-arm64, or linux
- */
-const os = (): string => {
- return process.platform === 'win32'
- ? 'windows-amd64'
- : process.platform === 'darwin'
- ? process.arch === 'arm64'
- ? 'mac-arm64'
- : 'mac-amd64'
- : 'linux-amd64'
-}
-
-/**
- * The CUDA version that will be set - either '11-7' or '12-0'.
- * @param settings
- * @returns
- */
-const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
- const isUsingCuda =
- settings?.vulkan !== true &&
- settings?.run_mode === 'gpu' &&
- !os().includes('mac')
-
- if (!isUsingCuda) return undefined
- return settings?.cuda?.version === '11' ? '11-7' : '12-0'
-}
-
-/**
- * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
- * @returns
- */
-const cpuInstructions = async (): Promise => {
- if (process.platform === 'darwin') return ''
-
- const child = fork(path.join(__dirname, './cpuInfo.js')) // Path to the child process file
-
- return new Promise((resolve, reject) => {
- child.on('message', (cpuInfo?: string) => {
- resolve(cpuInfo ?? 'noavx')
- child.kill() // Kill the child process after receiving the result
- })
-
- child.on('error', (err) => {
- resolve('noavx')
- child.kill()
- })
-
- child.on('exit', (code) => {
- if (code !== 0) {
- resolve('noavx')
- child.kill()
- }
- })
- })
-}
-
-/**
- * Find which variant to run based on the current platform.
- */
-const engineVariant = async (gpuSetting?: GpuSetting): Promise => {
- const cpuInstruction = await cpuInstructions()
- log(`[CORTEX]: CPU instruction: ${cpuInstruction}`)
- let engineVariant = [
- os(),
- gpuSetting?.vulkan
- ? 'vulkan'
- : gpuRunMode(gpuSetting) !== 'cuda'
- ? // CPU mode - support all variants
- cpuInstruction
- : // GPU mode - packaged CUDA variants of avx2 and noavx
- cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
- ? 'avx2'
- : 'noavx',
- gpuRunMode(gpuSetting),
- cudaVersion(gpuSetting),
- ]
- .filter((e) => !!e)
- .join('-')
-
- log(`[CORTEX]: Engine variant: ${engineVariant}`)
- return engineVariant
-}
/**
* Create symlink to each variant for the default bundled version
@@ -148,6 +47,5 @@ const symlinkEngines = async () => {
}
export default {
- engineVariant,
symlinkEngines,
}
diff --git a/extensions/engine-management-extension/src/utils.ts b/extensions/engine-management-extension/src/utils.ts
new file mode 100644
index 000000000..30d482313
--- /dev/null
+++ b/extensions/engine-management-extension/src/utils.ts
@@ -0,0 +1,86 @@
+import { GpuSetting, log } from '@janhq/core'
+
+/**
+ * The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
+ * @param settings
+ * @returns
+ */
+
+const gpuRunMode = (settings?: GpuSetting): string => {
+ return settings.gpus?.some(
+ (gpu) =>
+ gpu.activated === true &&
+ gpu.additional_information &&
+ gpu.additional_information.driver_version
+ )
+ ? 'cuda'
+ : ''
+}
+
+/**
+ * The OS & architecture that the current process is running on.
+ * @returns win, mac-x64, mac-arm64, or linux
+ */
+const os = (settings?: GpuSetting): string => {
+ return PLATFORM === 'win32'
+ ? 'windows-amd64'
+ : PLATFORM === 'darwin'
+ ? settings?.cpu?.arch === 'arm64'
+ ? 'mac-arm64'
+ : 'mac-amd64'
+ : 'linux-amd64'
+}
+
+/**
+ * The CUDA version that will be set - either '11-7' or '12-0'.
+ * @param settings
+ * @returns
+ */
+const cudaVersion = (settings?: GpuSetting): '12-0' | '11-7' | undefined => {
+ const isUsingCuda =
+ settings?.vulkan !== true &&
+ settings?.gpus?.some((gpu) => (gpu.activated === true ? 'gpu' : 'cpu')) &&
+ !os().includes('mac')
+
+ if (!isUsingCuda) return undefined
+ // return settings?.cuda?.version === '11' ? '11-7' : '12-0'
+ return settings.gpus?.some((gpu) => gpu.version.includes('12'))
+ ? '12-0'
+ : '11-7'
+}
+
+/**
+ * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
+ * @returns
+ */
+
+/**
+ * Find which variant to run based on the current platform.
+ */
+export const engineVariant = async (
+ gpuSetting?: GpuSetting
+): Promise => {
+ const platform = os(gpuSetting)
+
+ // There is no need to append the variant extension for mac
+ if (platform.startsWith('mac')) return platform
+
+ let engineVariant =
+ gpuSetting?.vulkan || gpuSetting.gpus.some((e) => !e.additional_information)
+ ? [platform, 'vulkan']
+ : [
+ platform,
+ gpuRunMode(gpuSetting) === 'cuda' &&
+ (gpuSetting.cpu.instructions.includes('avx2') ||
+ gpuSetting.cpu.instructions.includes('avx512'))
+ ? 'avx2'
+ : 'noavx',
+ gpuRunMode(gpuSetting),
+ cudaVersion(gpuSetting),
+ ].filter(Boolean) // Remove any falsy values
+
+ let engineVariantString = engineVariant.join('-')
+
+ log(`[CORTEX]: Engine variant: ${engineVariantString}`)
+ return engineVariantString
+}
diff --git a/extensions/hardware-management-extension/jest.config.js b/extensions/hardware-management-extension/jest.config.js
new file mode 100644
index 000000000..8bb37208d
--- /dev/null
+++ b/extensions/hardware-management-extension/jest.config.js
@@ -0,0 +1,5 @@
+/** @type {import('ts-jest').JestConfigWithTsJest} */
+module.exports = {
+ preset: 'ts-jest',
+ testEnvironment: 'node',
+}
diff --git a/extensions/hardware-management-extension/package.json b/extensions/hardware-management-extension/package.json
new file mode 100644
index 000000000..ec98c7440
--- /dev/null
+++ b/extensions/hardware-management-extension/package.json
@@ -0,0 +1,48 @@
+{
+ "name": "@janhq/hardware-management-extension",
+ "productName": "Hardware Management",
+ "version": "1.0.0",
+ "description": "Manages Better Hardware settings.",
+ "main": "dist/index.js",
+ "node": "dist/node/index.cjs.js",
+ "author": "Jan ",
+ "license": "MIT",
+ "scripts": {
+ "test": "jest",
+ "build": "rolldown -c rolldown.config.mjs",
+ "codesign:darwin": "../../.github/scripts/auto-sign.sh",
+ "codesign:win32:linux": "echo 'No codesigning required'",
+ "codesign": "run-script-os",
+ "build:publish": "rimraf *.tgz --glob || true && yarn build && yarn codesign && npm pack && cpx *.tgz ../../pre-install"
+ },
+ "exports": {
+ ".": "./dist/index.js",
+ "./main": "./dist/module.js"
+ },
+ "devDependencies": {
+ "cpx": "^1.5.0",
+ "rimraf": "^3.0.2",
+ "rolldown": "^1.0.0-beta.1",
+ "run-script-os": "^1.1.6",
+ "ts-loader": "^9.5.0",
+ "typescript": "^5.3.3"
+ },
+ "dependencies": {
+ "@janhq/core": "../../core/package.tgz",
+ "cpu-instructions": "^0.0.13",
+ "ky": "^1.7.2",
+ "p-queue": "^8.0.1"
+ },
+ "bundledDependencies": [
+ "cpu-instructions",
+ "@janhq/core"
+ ],
+ "hardwares": {
+ "node": ">=18.0.0"
+ },
+ "files": [
+ "dist/*",
+ "package.json",
+ "README.md"
+ ]
+}
diff --git a/extensions/hardware-management-extension/rolldown.config.mjs b/extensions/hardware-management-extension/rolldown.config.mjs
new file mode 100644
index 000000000..7982ca555
--- /dev/null
+++ b/extensions/hardware-management-extension/rolldown.config.mjs
@@ -0,0 +1,17 @@
+import { defineConfig } from 'rolldown'
+import pkgJson from './package.json' with { type: 'json' }
+
+export default defineConfig([
+ {
+ input: 'src/index.ts',
+ output: {
+ format: 'esm',
+ file: 'dist/index.js',
+ },
+ define: {
+ NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
+ API_URL: JSON.stringify('http://127.0.0.1:39291'),
+ SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
+ },
+ },
+])
diff --git a/extensions/hardware-management-extension/src/@types/global.d.ts b/extensions/hardware-management-extension/src/@types/global.d.ts
new file mode 100644
index 000000000..6639b9cbb
--- /dev/null
+++ b/extensions/hardware-management-extension/src/@types/global.d.ts
@@ -0,0 +1,12 @@
+declare const API_URL: string
+declare const SOCKET_URL: string
+declare const NODE: string
+
+interface Core {
+ api: APIFunctions
+ events: EventEmitter
+}
+interface Window {
+ core?: Core | undefined
+ electronAPI?: any | undefined
+}
diff --git a/extensions/hardware-management-extension/src/index.ts b/extensions/hardware-management-extension/src/index.ts
new file mode 100644
index 000000000..c2edc6159
--- /dev/null
+++ b/extensions/hardware-management-extension/src/index.ts
@@ -0,0 +1,67 @@
+import {
+ executeOnMain,
+ HardwareManagementExtension,
+ HardwareInformation,
+} from '@janhq/core'
+import ky from 'ky'
+import PQueue from 'p-queue'
+
+/**
+ * JSONHardwareManagementExtension is a HardwareManagementExtension implementation that provides
+ * functionality for managing engines.
+ */
+export default class JSONHardwareManagementExtension extends HardwareManagementExtension {
+ queue = new PQueue({ concurrency: 1 })
+
+ /**
+ * Called when the extension is loaded.
+ */
+ async onLoad() {
+ // Run Healthcheck
+ this.queue.add(() => this.healthz())
+ }
+
+ /**
+ * Called when the extension is unloaded.
+ */
+ onUnload() {}
+
+ /**
+ * Do health check on cortex.cpp
+ * @returns
+ */
+ async healthz(): Promise {
+ return ky
+ .get(`${API_URL}/healthz`, {
+ retry: { limit: 20, delay: () => 500, methods: ['get'] },
+ })
+ .then(() => {})
+ }
+
+ /**
+ * @returns A Promise that resolves to an object of hardware.
+ */
+ async getHardware(): Promise {
+ return this.queue.add(() =>
+ ky
+ .get(`${API_URL}/v1/hardware`)
+ .json()
+ .then((e) => e)
+ ) as Promise
+ }
+
+ /**
+ * @returns A Promise that resolves to an object of set gpu activate.
+ */
+ async setAvtiveGpu(data: { gpus: number[] }): Promise<{
+ message: string
+ activated_gpus: number[]
+ }> {
+ return this.queue.add(() =>
+ ky.post(`${API_URL}/v1/hardware/activate`, { json: data }).then((e) => e)
+ ) as Promise<{
+ message: string
+ activated_gpus: number[]
+ }>
+ }
+}
diff --git a/extensions/monitoring-extension/tsconfig.json b/extensions/hardware-management-extension/tsconfig.json
similarity index 65%
rename from extensions/monitoring-extension/tsconfig.json
rename to extensions/hardware-management-extension/tsconfig.json
index 2477d58ce..72e1e1895 100644
--- a/extensions/monitoring-extension/tsconfig.json
+++ b/extensions/hardware-management-extension/tsconfig.json
@@ -8,7 +8,9 @@
"forceConsistentCasingInFileNames": true,
"strict": false,
"skipLibCheck": true,
- "rootDir": "./src"
+ "rootDir": "./src",
+ "resolveJsonModule": true
},
- "include": ["./src"]
+ "include": ["./src"],
+ "exclude": ["src/**/*.test.ts", "rolldown.config.mjs"]
}
diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt
index 53bd3ae76..7ee7020b3 100644
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@@ -1 +1 @@
-1.0.9-rc7
+1.0.10
diff --git a/extensions/inference-cortex-extension/package.json b/extensions/inference-cortex-extension/package.json
index f191f3071..a4558dc8f 100644
--- a/extensions/inference-cortex-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@@ -1,7 +1,7 @@
{
"name": "@janhq/inference-cortex-extension",
"productName": "Cortex Inference Engine",
- "version": "1.0.24",
+ "version": "1.0.25",
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",
diff --git a/extensions/inference-cortex-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json
index a3a93f305..945f32729 100644
--- a/extensions/inference-cortex-extension/resources/default_settings.json
+++ b/extensions/inference-cortex-extension/resources/default_settings.json
@@ -76,7 +76,7 @@
},
{
"key": "use_mmap",
- "title": "MMAP",
+ "title": "mmap",
"description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
"controllerType": "checkbox",
"controllerProps": {
diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json
new file mode 100644
index 000000000..7f98b07a1
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json
@@ -0,0 +1,35 @@
+{
+ "sources": [
+ {
+ "filename": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
+ "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf"
+ }
+ ],
+ "id": "deepseek-r1-distill-llama-70b",
+ "object": "model",
+ "name": "DeepSeek R1 Distill Llama 70B Q4",
+ "version": "1.0",
+ "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+ "format": "gguf",
+ "settings": {
+ "ctx_len": 131072,
+ "prompt_template": "<|User|> {prompt} <|Assistant|>",
+ "llama_model_path": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
+ "ngl": 81
+ },
+ "parameters": {
+ "temperature": 0.6,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 131072,
+ "stop": [],
+ "frequency_penalty": 0,
+ "presence_penalty": 0
+ },
+ "metadata": {
+ "author": "DeepSeek",
+ "tags": ["70B", "Featured"],
+ "size": 42500000000
+ },
+ "engine": "llama-cpp"
+}
diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json
new file mode 100644
index 000000000..a3a075888
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json
@@ -0,0 +1,35 @@
+{
+ "sources": [
+ {
+ "filename": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
+ "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf"
+ }
+ ],
+ "id": "deepseek-r1-distill-llama-8b",
+ "object": "model",
+ "name": "DeepSeek R1 Distill Llama 8B Q5",
+ "version": "1.0",
+ "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+ "format": "gguf",
+ "settings": {
+ "ctx_len": 131072,
+ "prompt_template": "<|User|> {prompt} <|Assistant|>",
+ "llama_model_path": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
+ "ngl": 33
+ },
+ "parameters": {
+ "temperature": 0.6,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 131072,
+ "stop": [],
+ "frequency_penalty": 0,
+ "presence_penalty": 0
+ },
+ "metadata": {
+ "author": "DeepSeek",
+ "tags": ["8B", "Featured"],
+ "size": 5730000000
+ },
+ "engine": "llama-cpp"
+}
diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json
new file mode 100644
index 000000000..74b3dfc54
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json
@@ -0,0 +1,35 @@
+{
+ "sources": [
+ {
+ "filename": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
+ "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
+ }
+ ],
+ "id": "deepseek-r1-distill-qwen-1.5b",
+ "object": "model",
+ "name": "DeepSeek R1 Distill Qwen 1.5B Q5",
+ "version": "1.0",
+ "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+ "format": "gguf",
+ "settings": {
+ "ctx_len": 131072,
+ "prompt_template": "<|User|> {prompt} <|Assistant|>",
+ "llama_model_path": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
+ "ngl": 29
+ },
+ "parameters": {
+ "temperature": 0.6,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 131072,
+ "stop": [],
+ "frequency_penalty": 0,
+ "presence_penalty": 0
+ },
+ "metadata": {
+ "author": "DeepSeek",
+ "tags": ["1.5B", "Featured"],
+ "size": 1290000000
+ },
+ "engine": "llama-cpp"
+}
diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json
new file mode 100644
index 000000000..594ba6e41
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json
@@ -0,0 +1,35 @@
+{
+ "sources": [
+ {
+ "filename": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
+ "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf"
+ }
+ ],
+ "id": "deepseek-r1-distill-qwen-14b",
+ "object": "model",
+ "name": "DeepSeek R1 Distill Qwen 14B Q4",
+ "version": "1.0",
+ "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+ "format": "gguf",
+ "settings": {
+ "ctx_len": 131072,
+ "prompt_template": "<|User|> {prompt} <|Assistant|>",
+ "llama_model_path": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
+ "ngl": 49
+ },
+ "parameters": {
+ "temperature": 0.6,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 131072,
+ "stop": [],
+ "frequency_penalty": 0,
+ "presence_penalty": 0
+ },
+ "metadata": {
+ "author": "DeepSeek",
+ "tags": ["14B", "Featured"],
+ "size": 8990000000
+ },
+ "engine": "llama-cpp"
+}
diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json
new file mode 100644
index 000000000..6d7f5accf
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json
@@ -0,0 +1,35 @@
+{
+ "sources": [
+ {
+ "filename": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
+ "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf"
+ }
+ ],
+ "id": "deepseek-r1-distill-qwen-32b",
+ "object": "model",
+ "name": "DeepSeek R1 Distill Qwen 32B Q4",
+ "version": "1.0",
+ "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+ "format": "gguf",
+ "settings": {
+ "ctx_len": 131072,
+ "prompt_template": "<|User|> {prompt} <|Assistant|>",
+ "llama_model_path": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
+ "ngl": 65
+ },
+ "parameters": {
+ "temperature": 0.6,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 131072,
+ "stop": [],
+ "frequency_penalty": 0,
+ "presence_penalty": 0
+ },
+ "metadata": {
+ "author": "DeepSeek",
+ "tags": ["32B", "Featured"],
+ "size": 19900000000
+ },
+ "engine": "llama-cpp"
+}
diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json
new file mode 100644
index 000000000..eae53cf0e
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json
@@ -0,0 +1,35 @@
+{
+ "sources": [
+ {
+ "filename": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
+ "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf"
+ }
+ ],
+ "id": "deepseek-r1-distill-qwen-7b",
+ "object": "model",
+ "name": "DeepSeek R1 Distill Qwen 7B Q5",
+ "version": "1.0",
+ "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+ "format": "gguf",
+ "settings": {
+ "ctx_len": 131072,
+ "prompt_template": "<|User|> {prompt} <|Assistant|>",
+ "llama_model_path": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
+ "ngl": 29
+ },
+ "parameters": {
+ "temperature": 0.6,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 131072,
+ "stop": [],
+ "frequency_penalty": 0,
+ "presence_penalty": 0
+ },
+ "metadata": {
+ "author": "DeepSeek",
+ "tags": ["7B", "Featured"],
+ "size": 5440000000
+ },
+ "engine": "llama-cpp"
+}
diff --git a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
index ec9a0284b..1aeb80450 100644
--- a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
@@ -22,19 +22,13 @@
"top_p": 0.95,
"stream": true,
"max_tokens": 8192,
- "stop": [
- "<|end_of_text|>",
- "<|eot_id|>",
- "<|eom_id|>"
- ],
+ "stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "MetaAI",
- "tags": [
- "8B", "Featured"
- ],
+ "tags": ["8B", "Featured"],
"size": 4920000000
},
"engine": "llama-cpp"
diff --git a/extensions/inference-cortex-extension/rolldown.config.mjs b/extensions/inference-cortex-extension/rolldown.config.mjs
index 278664d3d..aebd8ac38 100644
--- a/extensions/inference-cortex-extension/rolldown.config.mjs
+++ b/extensions/inference-cortex-extension/rolldown.config.mjs
@@ -49,6 +49,13 @@ import qwen2514bJson from './resources/models/qwen2.5-14b-instruct/model.json' w
import qwen2532bJson from './resources/models/qwen2.5-32b-instruct/model.json' with { type: 'json' }
import qwen2572bJson from './resources/models/qwen2.5-72b-instruct/model.json' with { type: 'json' }
+import deepseekR1DistillQwen_1_5b from './resources/models/deepseek-r1-distill-qwen-1.5b/model.json' with { type: 'json' }
+import deepseekR1DistillQwen_7b from './resources/models/deepseek-r1-distill-qwen-7b/model.json' with { type: 'json' }
+import deepseekR1DistillQwen_14b from './resources/models/deepseek-r1-distill-qwen-14b/model.json' with { type: 'json' }
+import deepseekR1DistillQwen_32b from './resources/models/deepseek-r1-distill-qwen-32b/model.json' with { type: 'json' }
+import deepseekR1DistillLlama_8b from './resources/models/deepseek-r1-distill-llama-8b/model.json' with { type: 'json' }
+import deepseekR1DistillLlama_70b from './resources/models/deepseek-r1-distill-llama-70b/model.json' with { type: 'json' }
+
export default defineConfig([
{
input: 'src/index.ts',
@@ -106,6 +113,12 @@ export default defineConfig([
qwen2514bJson,
qwen2532bJson,
qwen2572bJson,
+ deepseekR1DistillQwen_1_5b,
+ deepseekR1DistillQwen_7b,
+ deepseekR1DistillQwen_14b,
+ deepseekR1DistillQwen_32b,
+ deepseekR1DistillLlama_8b,
+ deepseekR1DistillLlama_70b,
]),
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
SETTINGS: JSON.stringify(defaultSettingJson),
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 84cc49b94..05efaf735 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -112,8 +112,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
// Run the process watchdog
- const systemInfo = await systemInformation()
- this.queue.add(() => executeOnMain(NODE, 'run', systemInfo))
+ // const systemInfo = await systemInformation()
+ this.queue.add(() => executeOnMain(NODE, 'run'))
this.queue.add(() => this.healthz())
this.subscribeToEvents()
diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
index 420c84b6e..5222bb156 100644
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -16,15 +16,20 @@ let watchdog: ProcessWatchdog | undefined = undefined
* Spawns a Nitro subprocess.
* @returns A promise that resolves when the Nitro subprocess is started.
*/
-function run(systemInfo?: SystemInformation): Promise {
+function run(): Promise {
log(`[CORTEX]:: Spawning cortex subprocess...`)
return new Promise(async (resolve, reject) => {
- let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
- let binaryName = `cortex-server${process.platform === 'win32' ? '.exe' : ''}`
+ // let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
+ let binaryName = `cortex-server${
+ process.platform === 'win32' ? '.exe' : ''
+ }`
const binPath = path.join(__dirname, '..', 'bin')
const executablePath = path.join(binPath, binaryName)
+
+ addEnvPaths(binPath)
+
const sharedPath = path.join(appResourcePath(), 'shared')
// Execute the binary
log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`)
@@ -44,15 +49,17 @@ function run(systemInfo?: SystemInformation): Promise {
`${path.join(dataFolderPath, '.janrc')}`,
'--data_folder_path',
dataFolderPath,
+ '--loglevel',
+ 'INFO',
],
{
env: {
...process.env,
- CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
- // Vulkan - Support 1 device at a time for now
- ...(gpuVisibleDevices?.length > 0 && {
- GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
- }),
+ // CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
+ // // Vulkan - Support 1 device at a time for now
+ // ...(gpuVisibleDevices?.length > 0 && {
+ // GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
+ // }),
},
cwd: sharedPath,
}
@@ -71,6 +78,22 @@ function dispose() {
watchdog?.terminate()
}
+/**
+ * Set the environment paths for the cortex subprocess
+ * @param dest
+ */
+function addEnvPaths(dest: string) {
+ // Add engine path to the PATH and LD_LIBRARY_PATH
+ if (process.platform === 'win32') {
+ process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
+ } else {
+ process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
+ path.delimiter,
+ dest
+ )
+ }
+}
+
/**
* Cortex process info
*/
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 719671cfd..105f7ad91 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -15,8 +15,6 @@ import {
} from '@janhq/core'
import { CortexAPI } from './cortex'
import { scanModelsFolder } from './legacy/model-json'
-import { downloadModel } from './legacy/download'
-import { systemInformation } from '@janhq/core'
import { deleteModelFiles } from './legacy/delete'
export enum Settings {
@@ -71,18 +69,6 @@ export default class JanModelExtension extends ModelExtension {
* @returns A Promise that resolves when the model is downloaded.
*/
async pullModel(model: string, id?: string, name?: string): Promise {
- if (id) {
- const model: Model = ModelManager.instance().get(id)
- // Clip vision model - should not be handled by cortex.cpp
- // TensorRT model - should not be handled by cortex.cpp
- if (
- model &&
- (model.engine === InferenceEngine.nitro_tensorrt_llm ||
- model.settings.vision_model)
- ) {
- return downloadModel(model, (await systemInformation()).gpuSetting)
- }
- }
/**
* Sending POST to /models/pull/{id} endpoint to pull the model
*/
diff --git a/extensions/model-extension/src/legacy/download.ts b/extensions/model-extension/src/legacy/download.ts
index d4d6c62d9..570d0cd13 100644
--- a/extensions/model-extension/src/legacy/download.ts
+++ b/extensions/model-extension/src/legacy/download.ts
@@ -2,15 +2,12 @@ import {
downloadFile,
DownloadRequest,
fs,
- GpuSetting,
- InferenceEngine,
joinPath,
Model,
} from '@janhq/core'
export const downloadModel = async (
model: Model,
- gpuSettings?: GpuSetting,
network?: { ignoreSSL?: boolean; proxy?: string }
): Promise => {
const homedir = 'file://models'
@@ -27,41 +24,6 @@ export const downloadModel = async (
JSON.stringify(model, null, 2)
)
- if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
- if (!gpuSettings || gpuSettings.gpus.length === 0) {
- console.error('No GPU found. Please check your GPU setting.')
- return
- }
- const firstGpu = gpuSettings.gpus[0]
- if (!firstGpu.name.toLowerCase().includes('nvidia')) {
- console.error('No Nvidia GPU found. Please check your GPU setting.')
- return
- }
- const gpuArch = firstGpu.arch
- if (gpuArch === undefined) {
- console.error('No GPU architecture found. Please check your GPU setting.')
- return
- }
-
- if (!supportedGpuArch.includes(gpuArch)) {
- console.debug(
- `Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.`
- )
- return
- }
-
- const os = 'windows' // TODO: remove this hard coded value
-
- const newSources = model.sources.map((source) => {
- const newSource = { ...source }
- newSource.url = newSource.url
- .replace(//g, os)
- .replace(//g, gpuArch)
- return newSource
- })
- model.sources = newSources
- }
-
console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
if (model.sources.length > 1) {
diff --git a/extensions/monitoring-extension/README.md b/extensions/monitoring-extension/README.md
deleted file mode 100644
index f9690da09..000000000
--- a/extensions/monitoring-extension/README.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# Create a Jan Extension using Typescript
-
-Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
-
-## Create Your Own Extension
-
-To create your own extension, you can use this repository as a template! Just follow the below instructions:
-
-1. Click the Use this template button at the top of the repository
-2. Select Create a new repository
-3. Select an owner and name for your new repository
-4. Click Create repository
-5. Clone your new repository
-
-## Initial Setup
-
-After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
-
-> [!NOTE]
->
-> You'll need to have a reasonably modern version of
-> [Node.js](https://nodejs.org) handy. If you are using a version manager like
-> [`nodenv`](https://github.com/nodenv/nodenv) or
-> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
-> root of your repository to install the version specified in
-> [`package.json`](./package.json). Otherwise, 20.x or later should work!
-
-1. :hammer_and_wrench: Install the dependencies
-
- ```bash
- npm install
- ```
-
-1. :building_construction: Package the TypeScript for distribution
-
- ```bash
- npm run bundle
- ```
-
-1. :white_check_mark: Check your artifact
-
- There will be a tgz file in your extension directory now
-
-## Update the Extension Metadata
-
-The [`package.json`](package.json) file defines metadata about your extension, such as
-extension name, main entry, description and version.
-
-When you copy this repository, update `package.json` with the name, description for your extension.
-
-## Update the Extension Code
-
-The [`src/`](./src/) directory is the heart of your extension! This contains the
-source code that will be run when your extension functions are invoked. You can replace the
-contents of this directory with your own code.
-
-There are a few things to keep in mind when writing your extension code:
-
-- Most Jan Extension functions are processed asynchronously.
- In `index.ts`, you will see that the extension function will return a `Promise`.
-
- ```typescript
- import { events, MessageEvent, MessageRequest } from '@janhq/core'
-
- function onStart(): Promise {
- return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
- this.inference(data)
- )
- }
- ```
-
- For more information about the Jan Extension Core module, see the
- [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
-
-So, what are you waiting for? Go ahead and start customizing your extension!
diff --git a/extensions/monitoring-extension/bin/.gitkeep b/extensions/monitoring-extension/bin/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/extensions/monitoring-extension/download.bat b/extensions/monitoring-extension/download.bat
deleted file mode 100644
index 14e0aadd9..000000000
--- a/extensions/monitoring-extension/download.bat
+++ /dev/null
@@ -1,2 +0,0 @@
-@echo off
-.\node_modules\.bin\download https://catalog.jan.ai/vulkaninfoSDK.exe -o ./bin
\ No newline at end of file
diff --git a/extensions/monitoring-extension/package.json b/extensions/monitoring-extension/package.json
deleted file mode 100644
index 2f827b41b..000000000
--- a/extensions/monitoring-extension/package.json
+++ /dev/null
@@ -1,49 +0,0 @@
-{
- "name": "@janhq/monitoring-extension",
- "productName": "System Monitoring",
- "version": "1.0.10",
- "description": "Provides system health and OS level data.",
- "main": "dist/index.js",
- "node": "dist/node/index.cjs.js",
- "author": "Jan ",
- "license": "AGPL-3.0",
- "scripts": {
- "build": "rolldown -c rolldown.config.mjs && yarn download-artifacts",
- "download-artifacts": "run-script-os && cpx \"bin/**\" \"dist/bin\"",
- "download-artifacts:darwin": "echo 'No artifacts to download for darwin'",
- "download-artifacts:win32": "download.bat",
- "download-artifacts:linux": "download https://catalog.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
- "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
- },
- "exports": {
- ".": "./dist/index.js",
- "./main": "./dist/node/index.cjs.js"
- },
- "devDependencies": {
- "@types/node": "^20.11.4",
- "@types/node-os-utils": "^1.3.4",
- "cpx": "^1.5.0",
- "download-cli": "^1.1.1",
- "rimraf": "^3.0.2",
- "rolldown": "1.0.0-beta.1",
- "run-script-os": "^1.1.6",
- "typescript": "^5.3.3"
- },
- "dependencies": {
- "@janhq/core": "../../core/package.tgz",
- "node-os-utils": "^1.3.7"
- },
- "files": [
- "dist/*",
- "package.json",
- "README.md"
- ],
- "bundleDependencies": [
- "node-os-utils",
- "@janhq/core"
- ],
- "installConfig": {
- "hoistingLimits": "workspaces"
- },
- "packageManager": "yarn@4.5.3"
-}
diff --git a/extensions/monitoring-extension/resources/settings.json b/extensions/monitoring-extension/resources/settings.json
deleted file mode 100644
index 40b0b97f9..000000000
--- a/extensions/monitoring-extension/resources/settings.json
+++ /dev/null
@@ -1,22 +0,0 @@
-[
- {
- "key": "log-enabled",
- "title": "Enable App Logs",
- "description": "Saves app logs locally on your computer. This enables you to send us crash reports.",
- "controllerType": "checkbox",
- "controllerProps": {
- "value": true
- }
- },
- {
- "key": "log-cleaning-interval",
- "title": "Log Cleaning Interval",
- "description": "Automatically delete local logs after a certain time interval (in milliseconds).",
- "controllerType": "input",
- "controllerProps": {
- "value": "120000",
- "placeholder": "Interval in milliseconds. E.g. 120000",
- "textAlign": "right"
- }
- }
-]
\ No newline at end of file
diff --git a/extensions/monitoring-extension/rolldown.config.mjs b/extensions/monitoring-extension/rolldown.config.mjs
deleted file mode 100644
index 3533e052b..000000000
--- a/extensions/monitoring-extension/rolldown.config.mjs
+++ /dev/null
@@ -1,32 +0,0 @@
-import { defineConfig } from 'rolldown'
-import packageJson from './package.json' with { type: 'json' }
-import settingJson from './resources/settings.json' with { type: 'json' }
-
-export default defineConfig([
- {
- input: 'src/index.ts',
- output: {
- format: 'esm',
- file: 'dist/index.js',
- },
- platform: 'browser',
- define: {
- NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
- SETTINGS: JSON.stringify(settingJson),
- },
- },
- {
- input: 'src/node/index.ts',
- external: ['@janhq/core/node'],
- output: {
- format: 'cjs',
- file: 'dist/node/index.cjs.js',
- sourcemap: false,
- inlineDynamicImports: true,
- },
- resolve: {
- extensions: ['.js', '.ts', '.json'],
- },
- platform: 'node',
- },
-])
diff --git a/extensions/monitoring-extension/src/@types/global.d.ts b/extensions/monitoring-extension/src/@types/global.d.ts
deleted file mode 100644
index 7536fabd8..000000000
--- a/extensions/monitoring-extension/src/@types/global.d.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-declare const NODE: string
-declare const SETTINGS: SettingComponentProps[]
-
-type CpuGpuInfo = {
- cpu: {
- usage: number
- }
- gpu: GpuInfo[]
-}
-
-type GpuInfo = {
- id: string
- name: string
- temperature: string
- utilization: string
- memoryTotal: string
- memoryFree: string
- memoryUtilization: string
-}
diff --git a/extensions/monitoring-extension/src/index.ts b/extensions/monitoring-extension/src/index.ts
deleted file mode 100644
index 5616c70a8..000000000
--- a/extensions/monitoring-extension/src/index.ts
+++ /dev/null
@@ -1,90 +0,0 @@
-import {
- AppConfigurationEventName,
- GpuSetting,
- MonitoringExtension,
- OperatingSystemInfo,
- events,
- executeOnMain,
-} from '@janhq/core'
-
-enum Settings {
- logEnabled = 'log-enabled',
- logCleaningInterval = 'log-cleaning-interval',
-}
-/**
- * JanMonitoringExtension is a extension that provides system monitoring functionality.
- * It implements the MonitoringExtension interface from the @janhq/core package.
- */
-export default class JanMonitoringExtension extends MonitoringExtension {
- /**
- * Called when the extension is loaded.
- */
- async onLoad() {
- // Register extension settings
- this.registerSettings(SETTINGS)
-
- const logEnabled = await this.getSetting(Settings.logEnabled, true)
- const logCleaningInterval = parseInt(
- await this.getSetting(Settings.logCleaningInterval, '120000')
- )
- // Register File Logger provided by this extension
- await executeOnMain(NODE, 'registerLogger', {
- logEnabled,
- logCleaningInterval: isNaN(logCleaningInterval)
- ? 120000
- : logCleaningInterval,
- })
-
- // Attempt to fetch nvidia info
- await executeOnMain(NODE, 'updateNvidiaInfo')
- events.emit(AppConfigurationEventName.OnConfigurationUpdate, {})
- }
-
- onSettingUpdate(key: string, value: T): void {
- if (key === Settings.logEnabled) {
- executeOnMain(NODE, 'updateLogger', { logEnabled: value })
- } else if (key === Settings.logCleaningInterval) {
- executeOnMain(NODE, 'updateLogger', { logCleaningInterval: value })
- }
- }
-
- /**
- * Called when the extension is unloaded.
- */
- onUnload(): void {
- // Register File Logger provided by this extension
- executeOnMain(NODE, 'unregisterLogger')
- }
-
- /**
- * Returns the GPU configuration.
- * @returns A Promise that resolves to an object containing the GPU configuration.
- */
- async getGpuSetting(): Promise {
- return executeOnMain(NODE, 'getGpuConfig')
- }
-
- /**
- * Returns information about the system resources.
- * @returns A Promise that resolves to an object containing information about the system resources.
- */
- getResourcesInfo(): Promise {
- return executeOnMain(NODE, 'getResourcesInfo')
- }
-
- /**
- * Returns information about the current system load.
- * @returns A Promise that resolves to an object containing information about the current system load.
- */
- getCurrentLoad(): Promise {
- return executeOnMain(NODE, 'getCurrentLoad')
- }
-
- /**
- * Returns information about the OS
- * @returns
- */
- getOsInfo(): Promise {
- return executeOnMain(NODE, 'getOsInfo')
- }
-}
diff --git a/extensions/monitoring-extension/src/node/index.ts b/extensions/monitoring-extension/src/node/index.ts
deleted file mode 100644
index e32f85082..000000000
--- a/extensions/monitoring-extension/src/node/index.ts
+++ /dev/null
@@ -1,389 +0,0 @@
-import {
- GpuSetting,
- GpuSettingInfo,
- LoggerManager,
- OperatingSystemInfo,
- ResourceInfo,
- SupportedPlatforms,
- getJanDataFolderPath,
- log,
-} from '@janhq/core/node'
-import { mem, cpu } from 'node-os-utils'
-import { exec } from 'child_process'
-import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs'
-import path from 'path'
-import os from 'os'
-import { FileLogger } from './logger'
-
-/**
- * Path to the settings directory
- **/
-export const SETTINGS_DIR = path.join(getJanDataFolderPath(), 'settings')
-/**
- * Path to the settings file
- **/
-export const GPU_INFO_FILE = path.join(SETTINGS_DIR, 'settings.json')
-
-/**
- * Default GPU settings
- * TODO: This needs to be refactored to support multiple accelerators
- **/
-const DEFAULT_SETTINGS: GpuSetting = {
- notify: true,
- run_mode: 'cpu',
- nvidia_driver: {
- exist: false,
- version: '',
- },
- cuda: {
- exist: false,
- version: '',
- },
- gpus: [],
- gpu_highest_vram: '',
- gpus_in_use: [],
- is_initial: true,
- // TODO: This needs to be set based on user toggle in settings
- vulkan: false,
-}
-
-export const getGpuConfig = async (): Promise => {
- if (process.platform === 'darwin') return undefined
- if (existsSync(GPU_INFO_FILE))
- return JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
- return DEFAULT_SETTINGS
-}
-
-export const getResourcesInfo = async (): Promise => {
- const ramUsedInfo = await mem.used()
- const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
- const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
-
- const resourceInfo: ResourceInfo = {
- mem: {
- totalMemory,
- usedMemory,
- },
- }
-
- return resourceInfo
-}
-
-export const getCurrentLoad = () =>
- new Promise(async (resolve, reject) => {
- const cpuPercentage = await cpu.usage()
- let data = {
- run_mode: 'cpu',
- gpus_in_use: [],
- }
-
- if (process.platform !== 'darwin') {
- data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
- }
-
- if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
- const gpuIds = data.gpus_in_use.join(',')
- if (gpuIds !== '' && data['vulkan'] !== true) {
- exec(
- `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
- (error, stdout, _) => {
- if (error) {
- console.error(`exec error: ${error}`)
- throw new Error(error.message)
- }
- const gpuInfo: GpuInfo[] = stdout
- .trim()
- .split('\n')
- .map((line) => {
- const [
- id,
- name,
- temperature,
- utilization,
- memoryTotal,
- memoryFree,
- memoryUtilization,
- ] = line.split(', ').map((item) => item.replace(/\r/g, ''))
- return {
- id,
- name,
- temperature,
- utilization,
- memoryTotal,
- memoryFree,
- memoryUtilization,
- }
- })
-
- resolve({
- cpu: { usage: cpuPercentage },
- gpu: gpuInfo,
- })
- }
- )
- } else {
- // Handle the case where gpuIds is empty
- resolve({
- cpu: { usage: cpuPercentage },
- gpu: [],
- })
- }
- } else {
- // Handle the case where run_mode is not 'gpu' or no GPUs are in use
- resolve({
- cpu: { usage: cpuPercentage },
- gpu: [],
- })
- }
- })
-
-/**
- * This will retrieve GPU information and persist settings.json
- * Will be called when the extension is loaded to turn on GPU acceleration if supported
- */
-export const updateNvidiaInfo = async () => {
- // ignore if macos
- if (process.platform === 'darwin') return
-
- try {
- JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
- } catch (error) {
- if (!existsSync(SETTINGS_DIR)) {
- mkdirSync(SETTINGS_DIR, {
- recursive: true,
- })
- }
- writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2))
- }
-
- await updateNvidiaDriverInfo()
- await updateGpuInfo()
-}
-
-const updateNvidiaDriverInfo = async () =>
- new Promise((resolve, reject) => {
- exec(
- 'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
- (error, stdout) => {
- const data: GpuSetting = JSON.parse(
- readFileSync(GPU_INFO_FILE, 'utf-8')
- )
-
- if (!error) {
- const firstLine = stdout.split('\n')[0].trim()
- data.nvidia_driver.exist = true
- data.nvidia_driver.version = firstLine
- } else {
- data.nvidia_driver.exist = false
- }
-
- writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
- resolve({})
- }
- )
- })
-
-const getGpuArch = (gpuName: string): string => {
- if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'
-
- if (gpuName.includes('30')) return 'ampere'
- else if (gpuName.includes('40')) return 'ada'
- else return 'unknown'
-}
-
-const updateGpuInfo = async () =>
- new Promise((resolve, reject) => {
- let data: GpuSetting = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
-
- // Cuda
- if (data.vulkan === true) {
- // Vulkan
- exec(
- process.platform === 'win32'
- ? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
- : `${__dirname}/../bin/vulkaninfo --summary`,
- async (error, stdout) => {
- if (!error) {
- const output = stdout.toString()
-
- log(output)
- const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
-
- const gpus: GpuSettingInfo[] = []
- let match
- while ((match = gpuRegex.exec(output)) !== null) {
- const id = match[1]
- const name = match[2]
- const arch = getGpuArch(name)
- gpus.push({ id, vram: '0', name, arch })
- }
- data.gpus = gpus
-
- if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
- data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
- }
-
- data = await updateCudaExistence(data)
- writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
- log(`[APP]::${JSON.stringify(data)}`)
- resolve({})
- } else {
- reject(error)
- }
- }
- )
- } else {
- exec(
- 'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
- async (error, stdout) => {
- if (!error) {
- log(`[SPECS]::${stdout}`)
- // Get GPU info and gpu has higher memory first
- let highestVram = 0
- let highestVramId = '0'
- const gpus: GpuSettingInfo[] = stdout
- .trim()
- .split('\n')
- .map((line) => {
- let [id, vram, name] = line.split(', ')
- const arch = getGpuArch(name)
- vram = vram.replace(/\r/g, '')
- if (parseFloat(vram) > highestVram) {
- highestVram = parseFloat(vram)
- highestVramId = id
- }
- return { id, vram, name, arch }
- })
-
- data.gpus = gpus
- data.gpu_highest_vram = highestVramId
- } else {
- data.gpus = []
- data.gpu_highest_vram = undefined
- }
-
- if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
- data.gpus_in_use = data.gpu_highest_vram ? [data.gpu_highest_vram].filter(e => !!e) : []
- }
-
- data = await updateCudaExistence(data)
- console.log('[MONITORING]::Cuda info: ', data)
- writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
- log(`[APP]::${JSON.stringify(data)}`)
- resolve({})
- }
- )
- }
- })
-
-/**
- * Check if file exists in paths
- */
-const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => {
- return paths.some((p) => existsSync(path.join(p, file)))
-}
-
-/**
- * Validate cuda for linux and windows
- */
-const updateCudaExistence = async (
- data: GpuSetting = DEFAULT_SETTINGS
-): Promise => {
- let filesCuda12: string[]
- let filesCuda11: string[]
- let paths: string[]
- let cudaVersion: string = ''
-
- if (process.platform === 'win32') {
- filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
- filesCuda11 = ['cublas64_11.dll', 'cudart64_110.dll', 'cublasLt64_11.dll']
- paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
- } else {
- filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
- filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
- paths = process.env.LD_LIBRARY_PATH
- ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
- : []
- paths.push('/usr/lib/x86_64-linux-gnu/')
- }
-
- let cudaExists = filesCuda12.every(
- (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
- )
-
- if (!cudaExists) {
- cudaExists = filesCuda11.every(
- (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
- )
- if (cudaExists) {
- cudaVersion = '11'
- }
- } else {
- cudaVersion = '12'
- }
-
- data.cuda.exist = cudaExists
- data.cuda.version = cudaVersion
-
- console.debug(data.is_initial, data.gpus_in_use)
-
- if (cudaExists && data.is_initial && data.gpus_in_use.length > 0) {
- data.run_mode = 'gpu'
- }
-
- data.is_initial = false
-
- // Attempt to query CUDA using NVIDIA SMI
- if (!cudaExists) {
- await new Promise((resolve) => {
- exec('nvidia-smi', (error, stdout) => {
- if (!error) {
- const regex = /CUDA\s*Version:\s*(\d+\.\d+)/g
- const match = regex.exec(stdout)
- if (match && match[1]) {
- data.cuda.version = match[1]
- }
- }
- console.log('[MONITORING]::Finalized cuda info update: ', data)
- resolve()
- })
- })
- }
- return data
-}
-
-export const getOsInfo = (): OperatingSystemInfo => {
- const platform =
- SupportedPlatforms.find((p) => p === process.platform) || 'unknown'
-
- const osInfo: OperatingSystemInfo = {
- platform: platform,
- arch: process.arch,
- release: os.release(),
- machine: os.machine(),
- version: os.version(),
- totalMem: os.totalmem(),
- freeMem: os.freemem(),
- }
-
- return osInfo
-}
-
-export const registerLogger = ({ logEnabled, logCleaningInterval }) => {
- const logger = new FileLogger(logEnabled, logCleaningInterval)
- LoggerManager.instance().register(logger)
- logger.cleanLogs()
-}
-
-export const unregisterLogger = () => {
- LoggerManager.instance().unregister('file')
-}
-
-export const updateLogger = ({ logEnabled, logCleaningInterval }) => {
- const logger = LoggerManager.instance().loggers.get('file') as FileLogger
- if (logger && logEnabled !== undefined) logger.logEnabled = logEnabled
- if (logger && logCleaningInterval)
- logger.logCleaningInterval = logCleaningInterval
- // Rerun
- logger && logger.cleanLogs()
-}
diff --git a/server/cortex.json b/server/cortex.json
index 917cff354..0a9b83efd 100644
--- a/server/cortex.json
+++ b/server/cortex.json
@@ -5,77 +5,470 @@
"post": {
"operationId": "AssistantsController_create",
"summary": "Create assistant",
- "description": "Creates a new assistant.",
- "parameters": [],
+ "description": "Creates a new assistant with the specified configuration.",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/CreateAssistantDto"
+ "type": "object",
+ "properties": {
+ "model": {
+ "type": "string",
+ "description": "The model identifier to use for the assistant."
+ },
+ "name": {
+ "type": "string",
+ "description": "The name of the assistant."
+ },
+ "description": {
+ "type": "string",
+ "description": "The description of the assistant."
+ },
+ "instructions": {
+ "type": "string",
+ "description": "Instructions for the assistant's behavior."
+ },
+ "tools": {
+ "type": "array",
+ "description": "A list of tools enabled on the assistant. Maximum of 128 tools.",
+ "items": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": [
+ "code_interpreter",
+ "file_search",
+ "function"
+ ]
+ }
+ }
+ }
+ },
+ "tool_resources": {
+ "type": "object",
+ "description": "Resources used by the assistant's tools.",
+ "properties": {
+ "code_interpreter": {
+ "type": "object"
+ },
+ "file_search": {
+ "type": "object"
+ }
+ }
+ },
+ "metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs for the assistant.",
+ "additionalProperties": true
+ },
+ "temperature": {
+ "type": "number",
+ "format": "float",
+ "description": "Temperature parameter for response generation."
+ },
+ "top_p": {
+ "type": "number",
+ "format": "float",
+ "description": "Top p parameter for response generation."
+ },
+ "response_format": {
+ "oneOf": [
+ {
+ "type": "string",
+ "enum": ["auto"]
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "required": ["model"]
}
}
}
},
- "responses": {
- "201": {
- "description": "The assistant has been successfully created."
- }
- },
- "tags": ["Assistants"]
- },
- "get": {
- "operationId": "AssistantsController_findAll",
- "summary": "List assistants",
- "description": "Returns a list of assistants.",
- "parameters": [
- {
- "name": "limit",
- "required": false,
- "in": "query",
- "description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
- "schema": {
- "type": "number"
- }
- },
- {
- "name": "order",
- "required": false,
- "in": "query",
- "description": "Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.",
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "after",
- "required": false,
- "in": "query",
- "description": "A cursor for use in pagination. after is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.",
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "before",
- "required": false,
- "in": "query",
- "description": "A cursor for use in pagination. before is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.",
- "schema": {
- "type": "string"
- }
- }
- ],
"responses": {
"200": {
"description": "Ok",
"content": {
"application/json": {
"schema": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/AssistantEntity"
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "The unique identifier of the assistant."
+ },
+ "object": {
+ "type": "string",
+ "enum": ["assistant"],
+ "description": "The object type, which is always 'assistant'."
+ },
+ "created_at": {
+ "type": "integer",
+ "description": "Unix timestamp (in seconds) of when the assistant was created."
+ },
+ "model": {
+ "type": "string",
+ "description": "The model identifier used by the assistant."
+ },
+ "name": {
+ "type": "string",
+ "description": "The name of the assistant."
+ },
+ "description": {
+ "type": "string",
+ "description": "The description of the assistant."
+ },
+ "instructions": {
+ "type": "string",
+ "description": "Instructions for the assistant's behavior."
+ },
+ "tools": {
+ "type": "array",
+ "description": "A list of tools enabled on the assistant.",
+ "items": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": [
+ "code_interpreter",
+ "file_search",
+ "function"
+ ]
+ }
+ }
+ }
+ },
+ "tool_resources": {
+ "type": "object",
+ "description": "Resources used by the assistant's tools.",
+ "properties": {
+ "code_interpreter": {
+ "type": "object"
+ },
+ "file_search": {
+ "type": "object"
+ }
+ }
+ },
+ "metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs that can be attached to the assistant.",
+ "additionalProperties": true
+ },
+ "temperature": {
+ "type": "number",
+ "format": "float",
+ "description": "Temperature parameter for response generation."
+ },
+ "top_p": {
+ "type": "number",
+ "format": "float",
+ "description": "Top p parameter for response generation."
+ },
+ "response_format": {
+ "oneOf": [
+ {
+ "type": "string",
+ "enum": ["auto"]
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "required": [
+ "id",
+ "object",
+ "created_at",
+ "model",
+ "metadata"
+ ]
+ }
+ }
+ }
+ }
+ },
+ "tags": ["Assistants"]
+ },
+ "patch": {
+ "operationId": "AssistantsController_update",
+ "summary": "Update assistant",
+ "description": "Updates an assistant. Requires at least one modifiable field.",
+ "parameters": [
+ {
+ "name": "id",
+ "required": true,
+ "in": "path",
+ "description": "The unique identifier of the assistant.",
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "OpenAI-Beta",
+ "required": true,
+ "in": "header",
+ "description": "Beta feature header.",
+ "schema": {
+ "type": "string",
+ "enum": ["assistants=v2"]
+ }
+ }
+ ],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "properties": {
+ "model": {
+ "type": "string",
+ "description": "The model identifier to use for the assistant."
+ },
+ "name": {
+ "type": "string",
+ "description": "The name of the assistant."
+ },
+ "description": {
+ "type": "string",
+ "description": "The description of the assistant."
+ },
+ "instructions": {
+ "type": "string",
+ "description": "Instructions for the assistant's behavior."
+ },
+ "tools": {
+ "type": "array",
+ "description": "A list of tools enabled on the assistant. Maximum of 128 tools.",
+ "items": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": [
+ "code_interpreter",
+ "file_search",
+ "function"
+ ]
+ }
+ }
+ }
+ },
+ "tool_resources": {
+ "type": "object",
+ "description": "Resources used by the assistant's tools.",
+ "properties": {
+ "code_interpreter": {
+ "type": "object"
+ },
+ "file_search": {
+ "type": "object"
+ }
+ }
+ },
+ "metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs for the assistant.",
+ "additionalProperties": true
+ },
+ "temperature": {
+ "type": "number",
+ "format": "float",
+ "description": "Temperature parameter for response generation."
+ },
+ "top_p": {
+ "type": "number",
+ "format": "float",
+ "description": "Top p parameter for response generation."
+ },
+ "response_format": {
+ "oneOf": [
+ {
+ "type": "string",
+ "enum": ["auto"]
+ },
+ {
+ "type": "object"
+ }
+ ]
}
+ },
+ "minProperties": 1
+ }
+ }
+ }
+ },
+ "responses": {
+ "200": {
+ "description": "Ok",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "The unique identifier of the assistant."
+ },
+ "object": {
+ "type": "string",
+ "enum": ["assistant"],
+ "description": "The object type, which is always 'assistant'."
+ },
+ "created_at": {
+ "type": "integer",
+ "description": "Unix timestamp (in seconds) of when the assistant was created."
+ },
+ "model": {
+ "type": "string",
+ "description": "The model identifier used by the assistant."
+ },
+ "name": {
+ "type": "string",
+ "description": "The name of the assistant."
+ },
+ "description": {
+ "type": "string",
+ "description": "The description of the assistant."
+ },
+ "instructions": {
+ "type": "string",
+ "description": "Instructions for the assistant's behavior."
+ },
+ "tools": {
+ "type": "array",
+ "description": "A list of tools enabled on the assistant.",
+ "items": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": [
+ "code_interpreter",
+ "file_search",
+ "function"
+ ]
+ }
+ }
+ }
+ },
+ "tool_resources": {
+ "type": "object",
+ "description": "Resources used by the assistant's tools.",
+ "properties": {
+ "code_interpreter": {
+ "type": "object"
+ },
+ "file_search": {
+ "type": "object"
+ }
+ }
+ },
+ "metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs that can be attached to the assistant.",
+ "additionalProperties": true
+ },
+ "temperature": {
+ "type": "number",
+ "format": "float",
+ "description": "Temperature parameter for response generation."
+ },
+ "top_p": {
+ "type": "number",
+ "format": "float",
+ "description": "Top p parameter for response generation."
+ },
+ "response_format": {
+ "oneOf": [
+ {
+ "type": "string",
+ "enum": ["auto"]
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "required": [
+ "id",
+ "object",
+ "created_at",
+ "model",
+ "metadata"
+ ]
+ }
+ }
+ }
+ }
+ },
+ "tags": ["Assistants"]
+ },
+ "get": {
+ "operationId": "AssistantsController_list",
+ "summary": "List assistants",
+ "description": "Returns a list of assistants.",
+ "responses": {
+ "200": {
+ "description": "Ok",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "properties": {
+ "object": {
+ "type": "string",
+ "enum": ["list"],
+ "description": "The object type, which is always 'list' for a list response."
+ },
+ "data": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "The unique identifier of the assistant."
+ },
+ "object": {
+ "type": "string",
+ "enum": ["assistant"],
+ "description": "The object type, which is always 'assistant'."
+ },
+ "created_at": {
+ "type": "integer",
+ "description": "Unix timestamp (in seconds) of when the assistant was created."
+ },
+ "model": {
+ "type": "string",
+ "description": "The model identifier used by the assistant."
+ },
+ "metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs that can be attached to the assistant.",
+ "additionalProperties": true
+ }
+ },
+ "required": [
+ "id",
+ "object",
+ "created_at",
+ "model",
+ "metadata"
+ ]
+ }
+ }
+ },
+ "required": ["object", "data"]
}
}
}
@@ -88,7 +481,77 @@
"get": {
"operationId": "AssistantsController_findOne",
"summary": "Get assistant",
- "description": "Retrieves a specific assistant defined by an assistant's `id`.",
+ "description": "Retrieves a specific assistant by ID.",
+ "parameters": [
+ {
+ "name": "id",
+ "required": true,
+ "in": "path",
+ "description": "The unique identifier of the assistant.",
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "OpenAI-Beta",
+ "required": true,
+ "in": "header",
+ "description": "Beta feature header.",
+ "schema": {
+ "type": "string",
+ "enum": ["assistants=v2"]
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Ok",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "The unique identifier of the assistant."
+ },
+ "object": {
+ "type": "string",
+ "enum": ["assistant"],
+ "description": "The object type, which is always 'assistant'."
+ },
+ "created_at": {
+ "type": "integer",
+ "description": "Unix timestamp (in seconds) of when the assistant was created."
+ },
+ "model": {
+ "type": "string",
+ "description": "The model identifier used by the assistant."
+ },
+ "metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs attached to the assistant.",
+ "additionalProperties": true
+ }
+ },
+ "required": [
+ "id",
+ "object",
+ "created_at",
+ "model",
+ "metadata"
+ ]
+ }
+ }
+ }
+ }
+ },
+ "tags": ["Assistants"]
+ },
+ "delete": {
+ "operationId": "AssistantsController_remove",
+ "summary": "Delete assistant",
+ "description": "Deletes a specific assistant by ID.",
"parameters": [
{
"name": "id",
@@ -106,36 +569,24 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/AssistantEntity"
- }
- }
- }
- }
- },
- "tags": ["Assistants"]
- },
- "delete": {
- "operationId": "AssistantsController_remove",
- "summary": "Delete assistant",
- "description": "Deletes a specific assistant defined by an assistant's `id`.",
- "parameters": [
- {
- "name": "id",
- "required": true,
- "in": "path",
- "description": "The unique identifier of the assistant.",
- "schema": {
- "type": "string"
- }
- }
- ],
- "responses": {
- "200": {
- "description": "The assistant has been successfully deleted.",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/DeleteAssistantResponseDto"
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "The unique identifier of the deleted assistant."
+ },
+ "object": {
+ "type": "string",
+ "enum": ["assistant.deleted"],
+ "description": "The object type for a deleted assistant."
+ },
+ "deleted": {
+ "type": "boolean",
+ "enum": [true],
+ "description": "Indicates the assistant was successfully deleted."
+ }
+ },
+ "required": ["id", "object", "deleted"]
}
}
}
@@ -2199,6 +2650,84 @@
"tags": ["Engines"]
}
},
+ "/engines/{name}/releases/{version}": {
+ "get": {
+ "summary": "List variants for a specific engine version",
+ "description": "Lists all available variants (builds) for a specific version of an engine. Variants can include different CPU architectures (AVX, AVX2, AVX512), GPU support (CUDA, Vulkan), and operating systems (Windows, Linux, macOS).",
+ "parameters": [
+ {
+ "name": "name",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+ "default": "llama-cpp"
+ },
+ "description": "The type of engine"
+ },
+ {
+ "name": "version",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string"
+ },
+ "description": "The version of the engine"
+ },
+ {
+ "name": "show",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "string",
+ "enum": ["all", "compatible"],
+ "default": "all"
+ },
+ "description": "Filter the variants list. Use 'compatible' to show only variants compatible with the current system, or 'all' to show all available variants."
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successfully retrieved variants list",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "The name of the variant, including OS, architecture, and capabilities",
+ "example": "linux-amd64-avx-cuda-11-7"
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time",
+ "description": "Creation timestamp of the variant",
+ "example": "2024-11-13T04:51:16Z"
+ },
+ "size": {
+ "type": "integer",
+ "description": "Size of the variant in bytes",
+ "example": 151224604
+ },
+ "download_count": {
+ "type": "integer",
+ "description": "Number of times this variant has been downloaded",
+ "example": 0
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "tags": ["Engines"]
+ }
+ },
"/engines/{name}/releases/latest": {
"get": {
"summary": "Get latest release",
@@ -2314,7 +2843,7 @@
"get_models_url": {
"type": "string",
"description": "The URL to get models",
- "example": "https://api.openai.com/v1/models"
+ "example": "https://api.openai.com/models"
}
}
}
@@ -3378,6 +3907,7 @@
"Files",
"Hardware",
"Events",
+ "Assistants",
"Threads",
"Messages",
"Pulling Models",
@@ -4858,8 +5388,8 @@
"engine",
"version",
"inference_params",
- "TransformReq",
- "TransformResp",
+ "transform_req",
+ "transform_resp",
"metadata"
],
"properties": {
@@ -4867,9 +5397,9 @@
"type": "string",
"description": "The identifier of the model."
},
- "api_key_template": {
+ "header_template": {
"type": "string",
- "description": "Template for the API key header."
+ "description": "Template for the header."
},
"engine": {
"type": "string",
@@ -4902,7 +5432,7 @@
}
}
},
- "TransformReq": {
+ "transform_req": {
"type": "object",
"properties": {
"get_models": {
@@ -4924,7 +5454,7 @@
}
}
},
- "TransformResp": {
+ "transform_resp": {
"type": "object",
"properties": {
"chat_completions": {
@@ -5632,9 +6162,9 @@
"description": "Number of GPU layers.",
"example": 33
},
- "api_key_template": {
+ "header_template": {
"type": "string",
- "description": "Template for the API key header."
+ "description": "Template for the header."
},
"version": {
"type": "string",
diff --git a/web/containers/AutoLink/index.tsx b/web/containers/AutoLink/index.tsx
index 66c84f7f7..0f10f478a 100644
--- a/web/containers/AutoLink/index.tsx
+++ b/web/containers/AutoLink/index.tsx
@@ -10,23 +10,25 @@ const AutoLink = ({ text }: Props) => {
return (
<>
- {text.split(delimiter).map((word) => {
- const match = word.match(delimiter)
- if (match) {
- const url = match[0]
- return (
-
- {url}
-
- )
- }
- return word
- })}
+ {text &&
+ typeof text === 'string' &&
+ text.split(delimiter).map((word) => {
+ const match = word.match(delimiter)
+ if (match) {
+ const url = match[0]
+ return (
+
+ {url}
+
+ )
+ }
+ return word
+ })}
>
)
}
diff --git a/web/containers/ErrorMessage/index.tsx b/web/containers/ErrorMessage/index.tsx
index cd9334283..ab5a35d32 100644
--- a/web/containers/ErrorMessage/index.tsx
+++ b/web/containers/ErrorMessage/index.tsx
@@ -23,7 +23,13 @@ import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
-const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
+const ErrorMessage = ({
+ message,
+ errorComponent,
+}: {
+ message?: ThreadMessage
+ errorComponent?: React.ReactNode
+}) => {
const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
const setMainState = useSetAtom(mainViewStateAtom)
const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
@@ -50,7 +56,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
const getErrorTitle = () => {
const engine = getEngine()
- switch (message.metadata?.error_code) {
+ switch (message?.metadata?.error_code) {
case ErrorCode.InvalidApiKey:
case ErrorCode.AuthenticationError:
return (
@@ -61,7 +67,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
className="font-medium text-[hsla(var(--app-link))] underline"
onClick={() => {
setMainState(MainViewState.Settings)
- engine?.name && setSelectedSettingScreen(engine.name)
+ setSelectedSettingScreen(activeAssistant?.model?.engine ?? '')
}}
>
Settings
@@ -77,7 +83,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
data-testid="passthrough-error-message"
className="first-letter:uppercase"
>
- {message.content[0]?.text?.value === 'Failed to fetch' &&
+ {message?.content[0]?.text?.value === 'Failed to fetch' &&
engine &&
engine?.name !== InferenceEngine.cortex_llamacpp ? (
@@ -89,6 +95,9 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
{message?.content[0]?.text?.value && (
)}
+ {!message?.content[0]?.text?.value && (
+ Something went wrong. Please try again.
+ )}
>
)}
@@ -100,12 +109,15 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
-
Error
-
-
+
+
+ Error
+
+
+
setModalTroubleShooting(true)}
@@ -116,7 +128,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
{copied ? (
@@ -138,10 +150,10 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
- {getErrorTitle()}
+ {errorComponent ? errorComponent : getErrorTitle()}
diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/SystemMonitor.test.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/SystemMonitor.test.tsx
index 2eba5edbb..ca336b0e5 100644
--- a/web/containers/Layout/BottomPanel/SystemMonitor/SystemMonitor.test.tsx
+++ b/web/containers/Layout/BottomPanel/SystemMonitor/SystemMonitor.test.tsx
@@ -87,7 +87,7 @@ describe('SystemMonitor', () => {
expect(screen.getByText('Running Models')).toBeInTheDocument()
expect(screen.getByText('App Log')).toBeInTheDocument()
- expect(screen.getByText('7.45/14.90 GB')).toBeInTheDocument()
+ expect(screen.getByText('7.45GB / 14.90GB')).toBeInTheDocument()
expect(screen.getByText('30%')).toBeInTheDocument()
})
diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
index f47dfaeb7..d9a0b289a 100644
--- a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
+++ b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
@@ -134,8 +134,8 @@ const SystemMonitor = () => {
Memory
- {toGigabytes(usedRam, { hideUnit: true })}/
- {toGigabytes(totalRam, { hideUnit: true })} GB
+ {toGigabytes(usedRam, { hideUnit: true })}GB /{' '}
+ {toGigabytes(totalRam, { hideUnit: true })}GB
@@ -149,41 +149,43 @@ const SystemMonitor = () => {
{gpus.length > 0 && (
- {gpus.map((gpu, index) => {
- const gpuUtilization = utilizedMemory(
- gpu.memoryFree,
- gpu.memoryTotal
- )
- return (
-
-
-
- {gpu.name}
-
-
-
-
- {gpu.memoryTotal - gpu.memoryFree}/
- {gpu.memoryTotal}
-
-
MB
+ {gpus
+ .filter((gpu) => gpu.activated === true)
+ .map((gpu, index) => {
+ const gpuUtilization = utilizedMemory(
+ gpu.free_vram,
+ gpu.total_vram
+ )
+ return (
+
+
+
+ {gpu.name}
+
+
+
+
+ {gpu.total_vram - gpu.free_vram}/
+ {gpu.total_vram}
+
+ MB
+
-
-
-
-
- {gpuUtilization}%
-
+
+
+
+ {gpuUtilization}%
+
+
-
- )
- })}
+ )
+ })}
)}
diff --git a/web/containers/Layout/index.tsx b/web/containers/Layout/index.tsx
index 18c0edcab..d29647029 100644
--- a/web/containers/Layout/index.tsx
+++ b/web/containers/Layout/index.tsx
@@ -25,6 +25,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal'
import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
import SelectingModelModal from '@/screens/Settings/SelectingModelModal'
+import { getAppDistinctId, updateDistinctId } from '@/utils/settings'
+
import LoadingModal from '../LoadingModal'
import MainViewContainer from '../MainViewContainer'
@@ -96,8 +98,16 @@ const BaseLayout = () => {
return properties
},
})
- posthog.opt_in_capturing()
- posthog.register({ app_version: VERSION })
+ // Attempt to restore distinct Id from app global settings
+ getAppDistinctId()
+ .then((id) => {
+ if (id) posthog.identify(id)
+ })
+ .finally(() => {
+ posthog.opt_in_capturing()
+ posthog.register({ app_version: VERSION })
+ updateDistinctId(posthog.get_distinct_id())
+ })
} else {
posthog.opt_out_capturing()
}
diff --git a/web/containers/ModelDropdown/index.tsx b/web/containers/ModelDropdown/index.tsx
index 6d2cc0b23..a702d12f7 100644
--- a/web/containers/ModelDropdown/index.tsx
+++ b/web/containers/ModelDropdown/index.tsx
@@ -28,6 +28,8 @@ import ModelLabel from '@/containers/ModelLabel'
import SetupRemoteModel from '@/containers/SetupRemoteModel'
+import { useActiveModel } from '@/hooks/useActiveModel'
+
import { useCreateNewThread } from '@/hooks/useCreateNewThread'
import useDownloadModel from '@/hooks/useDownloadModel'
import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
@@ -40,7 +42,7 @@ import useUpdateModelParameters from '@/hooks/useUpdateModelParameters'
import { formatDownloadPercentage, toGigabytes } from '@/utils/converter'
import { manualRecommendationModel } from '@/utils/model'
-import { getLogoEngine } from '@/utils/modelEngine'
+import { getLogoEngine, getTitleByEngine } from '@/utils/modelEngine'
import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
import {
@@ -93,6 +95,7 @@ const ModelDropdown = ({
const { updateModelParameter } = useUpdateModelParameters()
const searchInputRef = useRef
(null)
const configuredModels = useAtomValue(configuredModelsAtom)
+ const { stopModel } = useActiveModel()
const featuredModels = configuredModels.filter(
(x) =>
@@ -226,6 +229,7 @@ const ModelDropdown = ({
const model = downloadedModels.find((m) => m.id === modelId)
setSelectedModel(model)
setOpen(false)
+ stopModel()
if (activeThread) {
// Change assistand tools based on model support RAG
@@ -248,18 +252,13 @@ const ModelDropdown = ({
],
})
- const defaultContextLength = Math.min(
- 8192,
- model?.settings.ctx_len ?? 8192
- )
-
+ const contextLength = model?.settings.ctx_len
+ ? Math.min(8192, model?.settings.ctx_len ?? 8192)
+ : undefined
const overriddenParameters = {
- ctx_len: model?.settings.ctx_len ? defaultContextLength : undefined,
- max_tokens: defaultContextLength
- ? Math.min(
- model?.parameters.max_tokens ?? 8192,
- defaultContextLength
- )
+ ctx_len: contextLength,
+ max_tokens: contextLength
+ ? Math.min(model?.parameters.max_tokens ?? 8192, contextLength)
: model?.parameters.max_tokens,
}
@@ -289,6 +288,7 @@ const ModelDropdown = ({
updateThreadMetadata,
setThreadModelParams,
updateModelParameter,
+ stopModel,
]
)
@@ -429,7 +429,7 @@ const ModelDropdown = ({
/>
)}
- {engine.name}
+ {getTitleByEngine(engine.name)}
@@ -475,7 +475,7 @@ const ModelDropdown = ({
>
{model.name}
@@ -549,75 +549,82 @@ const ModelDropdown = ({
(c) => c.id === model.id
)
return (
-
{
- if (!isConfigured && engine.type === 'remote')
- return null
- if (isDownloaded) {
- onClickModelItem(model.id)
- }
- }}
- >
-
-
+ {isDownloaded && (
+
- {model.name}
-
-
-
-
- {!isDownloaded && (
-
- {toGigabytes(model.metadata?.size)}
-
- )}
- {!isDownloading && !isDownloaded ? (
-
- downloadModel(
- model.sources[0].url,
- model.id
- )
+ onClick={() => {
+ if (
+ !isConfigured &&
+ engine.type === 'remote'
+ )
+ return null
+ if (isDownloaded) {
+ onClickModelItem(model.id)
}
- />
- ) : (
- Object.values(downloadStates)
- .filter((x) => x.modelId === model.id)
- .map((item) => (
-
+
+
+ {!isDownloaded && (
+
+ {toGigabytes(model.metadata?.size)}
+
+ )}
+ {!isDownloading && !isDownloaded ? (
+
+ downloadModel(
+ model.sources[0].url,
+ model.id
+ )
}
- size={100}
/>
- ))
- )}
-
-
+ ) : (
+ Object.values(downloadStates)
+ .filter((x) => x.modelId === model.id)
+ .map((item) => (
+
+ ))
+ )}
+
+
+ )}
+ >
)
})}
diff --git a/web/containers/ModelLabel/index.tsx b/web/containers/ModelLabel/index.tsx
index 564b7edf8..c7c64b210 100644
--- a/web/containers/ModelLabel/index.tsx
+++ b/web/containers/ModelLabel/index.tsx
@@ -29,15 +29,20 @@ const ModelLabel = ({ size, compact }: Props) => {
const { settings } = useSettings()
const getLabel = (size: number) => {
- const minimumRamModel = size * 1.25
- const availableRam =
- settings?.run_mode === 'gpu'
- ? availableVram * 1000000 // MB to bytes
- : totalRam - usedRam + (activeModel?.metadata?.size ?? 0)
+ const minimumRamModel = (size * 1.25) / (1024 * 1024)
+
+ const availableRam = settings?.gpus?.some((gpu) => gpu.activated)
+ ? availableVram * 1000000 // MB to bytes
+ : totalRam -
+ (usedRam +
+ (activeModel?.metadata?.size
+ ? (activeModel.metadata.size * 1.25) / (1024 * 1024)
+ : 0))
+
if (minimumRamModel > totalRam) {
return (
gpu.activated) ? 'VRAM' : 'RAM'}
compact={compact}
/>
)
diff --git a/web/containers/Providers/ModelHandler.tsx b/web/containers/Providers/ModelHandler.tsx
index 2c027539e..cceb88a4c 100644
--- a/web/containers/Providers/ModelHandler.tsx
+++ b/web/containers/Providers/ModelHandler.tsx
@@ -143,8 +143,7 @@ export default function ModelHandler() {
return
}
- // The thread title should not be updated if the message is less than 10 words
- // And no new line character is present
+ // No new line character is presented in the title
// And non-alphanumeric characters should be removed
if (messageContent.includes('\n')) {
messageContent = messageContent.replace(/\n/g, ' ')
diff --git a/web/containers/ServerLogs/index.tsx b/web/containers/ServerLogs/index.tsx
index 2e978bd23..b89a4c237 100644
--- a/web/containers/ServerLogs/index.tsx
+++ b/web/containers/ServerLogs/index.tsx
@@ -93,205 +93,211 @@ const ServerLogs = (props: ServerLogsProps) => {
}, [listRef.current?.scrollHeight, isUserManuallyScrollingUp, logs])
return (
-
- {withCopy && (
-
-
-
onRevealInFinder('Logs')}
- >
-
- <>
-
- Open
- >
-
-
-
{
- clipboard.copy(logs.slice(-100).join('\n') ?? '')
- }}
- >
-
- {clipboard.copied ? (
- <>
-
- Copying...
- >
- ) : (
- <>
-
- Copy All
- >
- )}
-
-
-
-
- )}
-
- {logs.length > 0 ? (
-
- {logs.slice(-limit).map((log, i) => {
- return (
-
- {log}
-
- )
- })}
-
- ) : (
-
-
-
-
-
+
+ {withCopy && (
+
+
+
onRevealInFinder('Logs')}
>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Empty logs
+
+ <>
+
+ Open
+ >
+
+
+
{
+ clipboard.copy(logs.slice(-100).join('\n') ?? '')
+ }}
+ >
+
+ {clipboard.copied ? (
+ <>
+
+ Copying...
+ >
+ ) : (
+ <>
+
+ Copy All
+ >
+ )}
+
+
+
)}
-
+
+
+ {logs.length > 0 ? (
+
+ {logs.slice(-limit).map((log, i) => {
+ return (
+
+ {log}
+
+ )
+ })}
+
+ ) : (
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Empty logs
+
+
+ )}
+
+
+ >
)
}
diff --git a/web/containers/SliderRightPanel/index.tsx b/web/containers/SliderRightPanel/index.tsx
index 3fad10212..5022845c9 100644
--- a/web/containers/SliderRightPanel/index.tsx
+++ b/web/containers/SliderRightPanel/index.tsx
@@ -73,7 +73,7 @@ const SliderRightPanel = ({
trigger={
(MainViewState.Thread)
export const defaultJanDataFolderAtom = atom('')
+export const LocalEngineDefaultVariantAtom = atom('')
+
const SHOW_RIGHT_PANEL = 'showRightPanel'
// Store panel atom
diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts
index 4901b9846..57ceeb385 100644
--- a/web/hooks/useCreateNewThread.ts
+++ b/web/hooks/useCreateNewThread.ts
@@ -82,24 +82,18 @@ export const useCreateNewThread = () => {
}
// Default context length is 8192
- const defaultContextLength = Math.min(
- 8192,
- defaultModel?.settings?.ctx_len ?? 8192
- )
+ const contextLength = defaultModel?.settings?.ctx_len
+ ? Math.min(8192, defaultModel?.settings?.ctx_len)
+ : undefined
const overriddenSettings = {
- ctx_len: defaultModel?.settings?.ctx_len
- ? Math.min(8192, defaultModel?.settings?.ctx_len)
- : undefined,
+ ctx_len: contextLength,
}
// Use ctx length by default
const overriddenParameters = {
- max_tokens: defaultContextLength
- ? Math.min(
- defaultModel?.parameters?.max_tokens ?? 8192,
- defaultContextLength
- )
+ max_tokens: contextLength
+ ? Math.min(defaultModel?.parameters?.max_tokens ?? 8192, contextLength)
: defaultModel?.parameters?.max_tokens,
}
diff --git a/web/hooks/useFactoryReset.ts b/web/hooks/useFactoryReset.ts
index da2e15b03..7344b2eb1 100644
--- a/web/hooks/useFactoryReset.ts
+++ b/web/hooks/useFactoryReset.ts
@@ -58,6 +58,7 @@ export default function useFactoryReset() {
const configuration: AppConfiguration = {
data_folder: defaultJanDataFolder,
quick_ask: appConfiguration?.quick_ask ?? false,
+ distinct_id: appConfiguration?.distinct_id,
}
await window.core?.api?.updateAppConfiguration(configuration)
}
diff --git a/web/hooks/useGetSystemResources.test.ts b/web/hooks/useGetSystemResources.test.ts
index 10e539e07..78392b612 100644
--- a/web/hooks/useGetSystemResources.test.ts
+++ b/web/hooks/useGetSystemResources.test.ts
@@ -21,7 +21,7 @@ jest.mock('jotai', () => ({
describe('useGetSystemResources', () => {
const mockMonitoringExtension = {
- getResourcesInfo: jest.fn(),
+ getHardware: jest.fn(),
getCurrentLoad: jest.fn(),
}
@@ -38,17 +38,17 @@ describe('useGetSystemResources', () => {
})
it('should fetch system resources on initial render', async () => {
- mockMonitoringExtension.getResourcesInfo.mockResolvedValue({
- mem: { usedMemory: 4000, totalMemory: 8000 },
+ mockMonitoringExtension.getHardware.mockResolvedValue({
+ cpu: { usage: 50 },
+ ram: { available: 4000, total: 8000 },
})
mockMonitoringExtension.getCurrentLoad.mockResolvedValue({
- cpu: { usage: 50 },
gpu: [],
})
const { result } = renderHook(() => useGetSystemResources())
- expect(mockMonitoringExtension.getResourcesInfo).toHaveBeenCalledTimes(1)
+ expect(mockMonitoringExtension.getHardware).toHaveBeenCalledTimes(1)
})
it('should start watching system resources when watch is called', () => {
@@ -58,14 +58,14 @@ describe('useGetSystemResources', () => {
result.current.watch()
})
- expect(mockMonitoringExtension.getResourcesInfo).toHaveBeenCalled()
+ expect(mockMonitoringExtension.getHardware).toHaveBeenCalled()
// Fast-forward time by 2 seconds
act(() => {
jest.advanceTimersByTime(2000)
})
- expect(mockMonitoringExtension.getResourcesInfo).toHaveBeenCalled()
+ expect(mockMonitoringExtension.getHardware).toHaveBeenCalled()
})
it('should stop watching when stopWatching is called', () => {
@@ -85,7 +85,7 @@ describe('useGetSystemResources', () => {
})
// Expect no additional calls after stopping
- expect(mockMonitoringExtension.getResourcesInfo).toHaveBeenCalled()
+ expect(mockMonitoringExtension.getHardware).toHaveBeenCalled()
})
it('should not fetch resources if monitoring extension is not available', async () => {
@@ -97,7 +97,7 @@ describe('useGetSystemResources', () => {
result.current.getSystemResources()
})
- expect(mockMonitoringExtension.getResourcesInfo).not.toHaveBeenCalled()
+ expect(mockMonitoringExtension.getHardware).not.toHaveBeenCalled()
expect(mockMonitoringExtension.getCurrentLoad).not.toHaveBeenCalled()
})
})
diff --git a/web/hooks/useGetSystemResources.ts b/web/hooks/useGetSystemResources.ts
index a05a6a710..e40100a55 100644
--- a/web/hooks/useGetSystemResources.ts
+++ b/web/hooks/useGetSystemResources.ts
@@ -1,6 +1,7 @@
+/* eslint-disable @typescript-eslint/no-explicit-any */
import { useCallback, useEffect, useState } from 'react'
-import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
+import { ExtensionTypeEnum, HardwareManagementExtension } from '@janhq/core'
import { useSetAtom } from 'jotai'
@@ -20,58 +21,62 @@ export default function useGetSystemResources() {
NodeJS.Timeout | number | undefined
>(undefined)
- const setTotalRam = useSetAtom(totalRamAtom)
const setGpus = useSetAtom(gpusAtom)
- const setUsedRam = useSetAtom(usedRamAtom)
const setCpuUsage = useSetAtom(cpuUsageAtom)
const setTotalNvidiaVram = useSetAtom(nvidiaTotalVramAtom)
const setAvailableVram = useSetAtom(availableVramAtom)
+ const setUsedRam = useSetAtom(usedRamAtom)
+ const setTotalRam = useSetAtom(totalRamAtom)
const setRamUtilitized = useSetAtom(ramUtilitizedAtom)
const getSystemResources = useCallback(async () => {
if (
- !extensionManager.get(
- ExtensionTypeEnum.SystemMonitoring
+ !extensionManager.get(
+ ExtensionTypeEnum.Hardware
)
) {
return
}
- const monitoring = extensionManager.get(
- ExtensionTypeEnum.SystemMonitoring
- )
- const resourceInfor = await monitoring?.getResourcesInfo()
- const currentLoadInfor = await monitoring?.getCurrentLoad()
- if (resourceInfor?.mem?.usedMemory) setUsedRam(resourceInfor.mem.usedMemory)
- if (resourceInfor?.mem?.totalMemory)
- setTotalRam(resourceInfor.mem.totalMemory)
+ const hardwareExtension = extensionManager.get(
+ ExtensionTypeEnum.Hardware
+ )
+
+ const hardwareInfo = await hardwareExtension?.getHardware()
+
+ const usedMemory =
+ Number(hardwareInfo?.ram.total) - Number(hardwareInfo?.ram.available)
+
+ if (hardwareInfo?.ram?.total && hardwareInfo?.ram?.available)
+ setUsedRam(Number(usedMemory))
+
+ if (hardwareInfo?.ram?.total) setTotalRam(hardwareInfo.ram.total)
const ramUtilitized =
- ((resourceInfor?.mem?.usedMemory ?? 0) /
- (resourceInfor?.mem?.totalMemory ?? 1)) *
- 100
+ ((Number(usedMemory) ?? 0) / (hardwareInfo?.ram.total ?? 1)) * 100
+
setRamUtilitized(Math.round(ramUtilitized))
- setCpuUsage(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
+ setCpuUsage(Math.round(hardwareInfo?.cpu.usage ?? 0))
- const gpus = currentLoadInfor?.gpu ?? []
- setGpus(gpus)
+ const gpus = hardwareInfo?.gpus ?? []
+ setGpus(gpus as any)
let totalNvidiaVram = 0
if (gpus.length > 0) {
totalNvidiaVram = gpus.reduce(
- (total: number, gpu: { memoryTotal: string }) =>
- total + Number(gpu.memoryTotal),
+ (total: number, gpu: { total_vram: number }) =>
+ total + Number(gpu.total_vram),
0
)
}
+
setTotalNvidiaVram(totalNvidiaVram)
+
setAvailableVram(
- gpus.reduce(
- (total: number, gpu: { memoryFree: string }) =>
- total + Number(gpu.memoryFree),
- 0
- )
+ gpus.reduce((total, gpu) => {
+ return total + Number(gpu.free_vram || 0)
+ }, 0)
)
}, [
setUsedRam,
diff --git a/web/hooks/useGpuSetting.test.ts b/web/hooks/useGpuSetting.test.ts
deleted file mode 100644
index f52f07af8..000000000
--- a/web/hooks/useGpuSetting.test.ts
+++ /dev/null
@@ -1,87 +0,0 @@
-// useGpuSetting.test.ts
-
-import { renderHook, act } from '@testing-library/react'
-import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
-
-// Mock dependencies
-jest.mock('@/extension')
-
-import useGpuSetting from './useGpuSetting'
-import { extensionManager } from '@/extension'
-
-describe('useGpuSetting', () => {
- beforeEach(() => {
- jest.clearAllMocks()
- })
-
- it('should return GPU settings when available', async () => {
- const mockGpuSettings = {
- gpuCount: 2,
- gpuNames: ['NVIDIA GeForce RTX 3080', 'NVIDIA GeForce RTX 3070'],
- totalMemory: 20000,
- freeMemory: 15000,
- }
-
- const mockMonitoringExtension: Partial = {
- getGpuSetting: jest.fn().mockResolvedValue(mockGpuSettings),
- }
-
- jest
- .spyOn(extensionManager, 'get')
- .mockReturnValue(mockMonitoringExtension as MonitoringExtension)
-
- const { result } = renderHook(() => useGpuSetting())
-
- let gpuSettings
- await act(async () => {
- gpuSettings = await result.current.getGpuSettings()
- })
-
- expect(gpuSettings).toEqual(mockGpuSettings)
- expect(extensionManager.get).toHaveBeenCalledWith(
- ExtensionTypeEnum.SystemMonitoring
- )
- expect(mockMonitoringExtension.getGpuSetting).toHaveBeenCalled()
- })
-
- it('should return undefined when no GPU settings are found', async () => {
- const mockMonitoringExtension: Partial = {
- getGpuSetting: jest.fn().mockResolvedValue(undefined),
- }
-
- jest
- .spyOn(extensionManager, 'get')
- .mockReturnValue(mockMonitoringExtension as MonitoringExtension)
-
- const { result } = renderHook(() => useGpuSetting())
-
- let gpuSettings
- await act(async () => {
- gpuSettings = await result.current.getGpuSettings()
- })
-
- expect(gpuSettings).toBeUndefined()
- expect(extensionManager.get).toHaveBeenCalledWith(
- ExtensionTypeEnum.SystemMonitoring
- )
- expect(mockMonitoringExtension.getGpuSetting).toHaveBeenCalled()
- })
-
- it('should handle missing MonitoringExtension', async () => {
- jest.spyOn(extensionManager, 'get').mockReturnValue(undefined)
- jest.spyOn(console, 'debug').mockImplementation(() => {})
-
- const { result } = renderHook(() => useGpuSetting())
-
- let gpuSettings
- await act(async () => {
- gpuSettings = await result.current.getGpuSettings()
- })
-
- expect(gpuSettings).toBeUndefined()
- expect(extensionManager.get).toHaveBeenCalledWith(
- ExtensionTypeEnum.SystemMonitoring
- )
- expect(console.debug).toHaveBeenCalledWith('No GPU setting found')
- })
-})
diff --git a/web/hooks/useGpuSetting.ts b/web/hooks/useGpuSetting.ts
deleted file mode 100644
index 36f51ed57..000000000
--- a/web/hooks/useGpuSetting.ts
+++ /dev/null
@@ -1,21 +0,0 @@
-import { useCallback } from 'react'
-
-import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
-
-import { extensionManager } from '@/extension'
-
-export default function useGpuSetting() {
- const getGpuSettings = useCallback(async () => {
- const gpuSetting = await extensionManager
- ?.get(ExtensionTypeEnum.SystemMonitoring)
- ?.getGpuSetting()
-
- if (!gpuSetting) {
- console.debug('No GPU setting found')
- return undefined
- }
- return gpuSetting
- }, [])
-
- return { getGpuSettings }
-}
diff --git a/web/hooks/useHardwareManagement.ts b/web/hooks/useHardwareManagement.ts
new file mode 100644
index 000000000..d39b3c1fc
--- /dev/null
+++ b/web/hooks/useHardwareManagement.ts
@@ -0,0 +1,99 @@
+import { useMemo } from 'react'
+
+import { ExtensionTypeEnum, HardwareManagementExtension } from '@janhq/core'
+
+import { useSetAtom } from 'jotai'
+import useSWR from 'swr'
+
+import { extensionManager } from '@/extension/ExtensionManager'
+import {
+ cpuUsageAtom,
+ ramUtilitizedAtom,
+ totalRamAtom,
+ usedRamAtom,
+} from '@/helpers/atoms/SystemBar.atom'
+
+// fetcher function
+async function fetchExtensionData(
+ extension: HardwareManagementExtension | null,
+ method: (extension: HardwareManagementExtension) => Promise
+): Promise {
+ if (!extension) {
+ throw new Error('Extension not found')
+ }
+ return method(extension)
+}
+
+const getExtension = () =>
+ extensionManager.get(
+ ExtensionTypeEnum.Hardware
+ ) ?? null
+
+/**
+ * @returns A Promise that resolves to an object of list engines.
+ */
+export function useGetHardwareInfo() {
+ const setCpuUsage = useSetAtom(cpuUsageAtom)
+ const setUsedRam = useSetAtom(usedRamAtom)
+ const setTotalRam = useSetAtom(totalRamAtom)
+ const setRamUtilitized = useSetAtom(ramUtilitizedAtom)
+
+ const extension = useMemo(
+ () =>
+ extensionManager.get(
+ ExtensionTypeEnum.Hardware
+ ) ?? null,
+ []
+ )
+
+ const {
+ data: hardware,
+ error,
+ mutate,
+ } = useSWR(
+ extension ? 'hardware' : null,
+ () => fetchExtensionData(extension, (ext) => ext.getHardware()),
+ {
+ revalidateOnFocus: false,
+ revalidateOnReconnect: false,
+ refreshInterval: 2000,
+ }
+ )
+
+ const usedMemory =
+ Number(hardware?.ram.total) - Number(hardware?.ram.available)
+
+ if (hardware?.ram?.total && hardware?.ram?.available)
+ setUsedRam(Number(usedMemory))
+
+ if (hardware?.ram?.total) setTotalRam(hardware.ram.total)
+
+ const ramUtilitized =
+ ((Number(usedMemory) ?? 0) / (hardware?.ram.total ?? 1)) * 100
+
+ setRamUtilitized(Math.round(ramUtilitized))
+
+ setCpuUsage(Math.round(hardware?.cpu.usage ?? 0))
+
+ return { hardware, error, mutate }
+}
+
+/**
+ * set gpus activate
+ * @returns A Promise that resolves set gpus activate.
+ */
+export const setActiveGpus = async (data: { gpus: number[] }) => {
+ const extension = getExtension()
+
+ if (!extension) {
+ throw new Error('Extension is not available')
+ }
+
+ try {
+ const response = await extension.setAvtiveGpu(data)
+ return response
+ } catch (error) {
+ console.error('Failed to install engine variant:', error)
+ throw error
+ }
+}
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index d3c8ff142..65124fcab 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -196,7 +196,10 @@ export default function useSendChatMessage() {
}
updateThread(updatedThread)
- if (!isResend) {
+ if (
+ !isResend &&
+ (newMessage.content.length || newMessage.attachments?.length)
+ ) {
// Add message
const createdMessage = await extensionManager
.get(ExtensionTypeEnum.Conversational)
diff --git a/web/hooks/useSettings.ts b/web/hooks/useSettings.ts
index 0f02d41af..a9635aa93 100644
--- a/web/hooks/useSettings.ts
+++ b/web/hooks/useSettings.ts
@@ -1,20 +1,10 @@
import { useCallback, useEffect, useState } from 'react'
-import { fs, joinPath } from '@janhq/core'
-
-type NvidiaDriver = {
- exist: boolean
- version: string
-}
+import { fs, GpuSettingInfo, joinPath } from '@janhq/core'
export type AppSettings = {
- run_mode: 'cpu' | 'gpu' | undefined
- notify: boolean
- gpus_in_use: string[]
vulkan: boolean
- gpus: string[]
- nvidia_driver: NvidiaDriver
- cuda: NvidiaDriver
+ gpus: GpuSettingInfo[]
}
export const useSettings = () => {
@@ -38,29 +28,16 @@ export const useSettings = () => {
return {}
}, [])
- const saveSettings = async ({
- runMode,
- notify,
- gpusInUse,
- vulkan,
- }: {
- runMode?: string | undefined
- notify?: boolean | undefined
- gpusInUse?: string[] | undefined
- vulkan?: boolean | undefined
- }) => {
+ const saveSettings = async ({ vulkan }: { vulkan?: boolean | undefined }) => {
const settingsFile = await joinPath(['file://settings', 'settings.json'])
const settings = await readSettings()
- if (runMode != null) settings.run_mode = runMode
- if (notify != null) settings.notify = notify
- if (gpusInUse != null) settings.gpus_in_use = gpusInUse.filter((e) => !!e)
if (vulkan != null) {
settings.vulkan = vulkan
// GPU enabled, set run_mode to 'gpu'
if (settings.vulkan === true) {
- settings.run_mode = 'gpu'
- } else {
- settings.run_mode = 'cpu'
+ settings?.gpus?.some((gpu: { activated: boolean }) =>
+ gpu.activated === true ? 'gpu' : 'cpu'
+ )
}
}
await fs.writeFileSync(settingsFile, JSON.stringify(settings))
diff --git a/web/package.json b/web/package.json
index 13d433b3a..63dde8c05 100644
--- a/web/package.json
+++ b/web/package.json
@@ -14,6 +14,7 @@
"test": "jest"
},
"dependencies": {
+ "@hello-pangea/dnd": "17.0.0",
"@hookform/resolvers": "^3.9.1",
"@janhq/core": "link:../core",
"@janhq/joi": "link:../joi",
@@ -29,7 +30,7 @@
"jotai": "^2.6.0",
"katex": "^0.16.10",
"lodash": "^4.17.21",
- "lucide-react": "^0.291.0",
+ "lucide-react": "^0.311.0",
"marked": "^9.1.2",
"next": "14.2.3",
"next-themes": "^0.2.1",
@@ -57,7 +58,7 @@
"slate-react": "0.110.3",
"swr": "^2.2.5",
"tailwind-merge": "^2.0.0",
- "tailwindcss": "3.3.5",
+ "tailwindcss": "3.4.17",
"ulidx": "^2.3.0",
"use-debounce": "^10.0.0",
"uuid": "^9.0.1",
diff --git a/web/public/images/ModelProvider/deepseek.svg b/web/public/images/ModelProvider/deepseek.svg
new file mode 100644
index 000000000..6f4b775d3
--- /dev/null
+++ b/web/public/images/ModelProvider/deepseek.svg
@@ -0,0 +1,25 @@
+
+
+
+
+Created with Pixso.
+
+
diff --git a/web/public/images/ModelProvider/google-gemini.svg b/web/public/images/ModelProvider/google-gemini.svg
new file mode 100644
index 000000000..787c83710
--- /dev/null
+++ b/web/public/images/ModelProvider/google-gemini.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/web/screens/Hub/ModelList/ModelHeader/index.tsx b/web/screens/Hub/ModelList/ModelHeader/index.tsx
index fea3dd3a6..6315ea1fc 100644
--- a/web/screens/Hub/ModelList/ModelHeader/index.tsx
+++ b/web/screens/Hub/ModelList/ModelHeader/index.tsx
@@ -55,7 +55,7 @@ const ModelItemHeader = ({ model, onSelectedModel }: Props) => {
// Default nvidia returns vram in MB, need to convert to bytes to match the unit of totalRamW
let ram = nvidiaTotalVram * 1024 * 1024
- if (ram === 0 || settings?.run_mode === 'cpu') {
+ if (ram === 0 || settings?.gpus?.some((gpu) => gpu.activated !== true)) {
ram = totalRam
}
const serverEnabled = useAtomValue(serverEnabledAtom)
diff --git a/web/screens/Hub/ModelPage/index.tsx b/web/screens/Hub/ModelPage/index.tsx
index dd551c96d..d46d18ca4 100644
--- a/web/screens/Hub/ModelPage/index.tsx
+++ b/web/screens/Hub/ModelPage/index.tsx
@@ -131,10 +131,10 @@ const ModelPage = ({ model, onGoBack }: Props) => {
{model.type !== 'cloud' && (
<>
-
+
Format
-
+
Size
>
diff --git a/web/screens/LocalServer/LocalServerRightPanel/index.tsx b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
index 900a8128e..02e51868d 100644
--- a/web/screens/LocalServer/LocalServerRightPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
@@ -124,16 +124,20 @@ const LocalServerRightPanel = () => {
clipboard.copy(selectedModel?.id)
}}
suffixIcon={
- clipboard.copied ? (
-
+ selectedModel ? (
+ clipboard.copied ? (
+
+ ) : (
+
+ )
) : (
-
+ <>>
)
}
/>
diff --git a/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx b/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx
index c59163735..207a11ee6 100644
--- a/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx
+++ b/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx
@@ -30,7 +30,7 @@ const ModalConfirmReset = () => {
content={
- Restore appplication to its initial state, erasing all models and
+ Restore application to its initial state, erasing all models and
chat history. This action is irreversible and recommended only if
the application is corrupted.
diff --git a/web/screens/Settings/Advanced/FactoryReset/index.tsx b/web/screens/Settings/Advanced/FactoryReset/index.tsx
index f688e0a61..72642fafe 100644
--- a/web/screens/Settings/Advanced/FactoryReset/index.tsx
+++ b/web/screens/Settings/Advanced/FactoryReset/index.tsx
@@ -17,7 +17,7 @@ const FactoryReset = () => {
- Restore appplication to its initial state, erasing all models and chat
+ Restore application to its initial state, erasing all models and chat
history. This action is irreversible and recommended only if the
application is corrupted.
diff --git a/web/screens/Settings/Advanced/index.tsx b/web/screens/Settings/Advanced/index.tsx
index 66240c028..cb10a1778 100644
--- a/web/screens/Settings/Advanced/index.tsx
+++ b/web/screens/Settings/Advanced/index.tsx
@@ -1,32 +1,19 @@
'use client'
-import { useEffect, useState, ChangeEvent } from 'react'
+import { ChangeEvent } from 'react'
-import { openExternalUrl, AppConfiguration } from '@janhq/core'
+import { AppConfiguration } from '@janhq/core'
-import {
- ScrollArea,
- Switch,
- Input,
- Tooltip,
- Checkbox,
- useClickOutside,
- Button,
-} from '@janhq/joi'
+import { ScrollArea, Switch, Button } from '@janhq/joi'
import { useAtom, useAtomValue, useSetAtom } from 'jotai'
-import { ChevronDownIcon, ArrowRightIcon } from 'lucide-react'
-import { AlertTriangleIcon, AlertCircleIcon } from 'lucide-react'
-
-import { twMerge } from 'tailwind-merge'
+import { ArrowRightIcon } from 'lucide-react'
import { useDebouncedCallback } from 'use-debounce'
-import { snackbar, toaster } from '@/containers/Toast'
+import { toaster } from '@/containers/Toast'
-import { useActiveModel } from '@/hooks/useActiveModel'
import { useConfigurations } from '@/hooks/useConfigurations'
-import { useSettings } from '@/hooks/useSettings'
import ModalDeleteAllThreads from '@/screens/Thread/ThreadLeftPanel/ModalDeleteAllThreads'
@@ -36,7 +23,6 @@ import FactoryReset from './FactoryReset'
import {
experimentalFeatureEnabledAtom,
proxyEnabledAtom,
- vulkanEnabledAtom,
quickAskEnabledAtom,
} from '@/helpers/atoms/AppConfig.atom'
@@ -44,12 +30,6 @@ import { ThreadModalAction } from '@/helpers/atoms/Thread.atom'
import { modalActionThreadAtom } from '@/helpers/atoms/Thread.atom'
-type GPU = {
- id: string
- vram: number | null
- name: string
-}
-
/**
* Advanced Settings Screen
* @returns
@@ -58,31 +38,14 @@ const Advanced = ({ setSubdir }: { setSubdir: (subdir: string) => void }) => {
const [experimentalEnabled, setExperimentalEnabled] = useAtom(
experimentalFeatureEnabledAtom
)
- const [vulkanEnabled, setVulkanEnabled] = useAtom(vulkanEnabledAtom)
+
const [proxyEnabled, setProxyEnabled] = useAtom(proxyEnabledAtom)
const quickAskEnabled = useAtomValue(quickAskEnabledAtom)
- const [gpuEnabled, setGpuEnabled] = useState(false)
- const [gpuList, setGpuList] = useState([])
- const [gpusInUse, setGpusInUse] = useState([])
- const [dropdownOptions, setDropdownOptions] = useState(
- null
- )
const { configurePullOptions } = useConfigurations()
- const [toggle, setToggle] = useState(null)
-
- const { readSettings, saveSettings } = useSettings()
- const { stopModel } = useActiveModel()
- const [open, setOpen] = useState(false)
const setModalActionThread = useSetAtom(modalActionThreadAtom)
- const selectedGpu = gpuList
- .filter((x) => gpusInUse.includes(x.id))
- .map((y) => {
- return y['name']
- })
-
/**
* There could be a case where the state update is not synced
* so that retrieving state value from other hooks would not be accurate
@@ -110,24 +73,6 @@ const Advanced = ({ setSubdir }: { setSubdir: (subdir: string) => void }) => {
if (relaunch) window.core?.api?.relaunch()
}
- /**
- * Update Vulkan Enabled
- * @param e
- * @param relaunch
- * @returns void
- */
- const updateVulkanEnabled = async (e: boolean, relaunch: boolean = true) => {
- toaster({
- title: 'Reload',
- description: 'Vulkan settings updated. Reload now to apply the changes.',
- })
- stopModel()
- setVulkanEnabled(e)
- await saveSettings({ vulkan: e, gpusInUse: [] })
- // Relaunch to apply settings
- if (relaunch) window.location.reload()
- }
-
/**
* Update Experimental Enabled
* @param e
@@ -143,71 +88,11 @@ const Advanced = ({ setSubdir }: { setSubdir: (subdir: string) => void }) => {
if (e.target.checked) return
// It affects other settings, so we need to reset them
- const isRelaunch = quickAskEnabled || vulkanEnabled
+ const isRelaunch = quickAskEnabled
if (quickAskEnabled) await updateQuickAskEnabled(false, false)
- if (vulkanEnabled) await updateVulkanEnabled(false, false)
if (isRelaunch) window.core?.api?.relaunch()
}
- /**
- * useEffect to set GPU enabled if possible
- */
- useEffect(() => {
- const setUseGpuIfPossible = async () => {
- const settings = await readSettings()
- setGpuEnabled(settings.run_mode === 'gpu' && settings.gpus?.length > 0)
- setGpusInUse(settings.gpus_in_use || [])
- setVulkanEnabled(settings.vulkan || false)
- if (settings.gpus) {
- setGpuList(settings.gpus)
- }
- }
- setUseGpuIfPossible()
- }, [readSettings, setGpuList, setGpuEnabled, setGpusInUse, setVulkanEnabled])
-
- /**
- * Handle GPU Change
- * @param gpuId
- * @returns
- */
- const handleGPUChange = async (gpuId: string) => {
- let updatedGpusInUse = [...gpusInUse]
- if (updatedGpusInUse.includes(gpuId)) {
- updatedGpusInUse = updatedGpusInUse.filter((id) => id !== gpuId)
- if (
- gpuEnabled &&
- updatedGpusInUse.length === 0 &&
- gpuId &&
- gpuId.trim()
- ) {
- // Vulkan support only allow 1 active device at a time
- if (vulkanEnabled) {
- updatedGpusInUse = []
- }
- updatedGpusInUse.push(gpuId)
- }
- } else {
- // Vulkan support only allow 1 active device at a time
- if (vulkanEnabled) {
- updatedGpusInUse = []
- }
- if (gpuId && gpuId.trim()) updatedGpusInUse.push(gpuId)
- }
- setGpusInUse(updatedGpusInUse)
- await saveSettings({ gpusInUse: updatedGpusInUse.filter((e) => !!e) })
- // Reload window to apply changes
- // This will trigger engine servers to restart
- window.location.reload()
- }
-
- const gpuSelectionPlaceHolder =
- gpuList.length > 0 ? 'Select GPU' : "You don't have any compatible GPU"
-
- /**
- * Handle click outside
- */
- useClickOutside(() => setOpen(false), null, [dropdownOptions, toggle])
-
return (
@@ -231,201 +116,6 @@ const Advanced = ({ setSubdir }: { setSubdir: (subdir: string) => void }) => {
- {/* CPU / GPU switching */}
- {!isMac && (
-
-
-
-
-
GPU Acceleration
-
-
- Enable to enhance model performance by utilizing your GPU
- devices for acceleration. Read{' '}
-
- {' '}
-
- openExternalUrl(
- 'https://jan.ai/guides/troubleshooting/gpu-not-used/'
- )
- }
- >
- troubleshooting guide
- {' '}
- {' '}
- for further assistance.
-
-
-
-
- {gpuList.length > 0 && !gpuEnabled && (
-
- }
- content="Disabling NVIDIA GPU Acceleration may result in reduced
- performance. It is recommended to keep this enabled for
- optimal user experience."
- />
- )}
- {
- if (e.target.checked === true) {
- saveSettings({ runMode: 'gpu' })
- setGpuEnabled(true)
- snackbar({
- description:
- 'Successfully turned on GPU Acceleration',
- type: 'success',
- })
- } else {
- saveSettings({ runMode: 'cpu' })
- setGpuEnabled(false)
- snackbar({
- description:
- 'Successfully turned off GPU Acceleration',
- type: 'success',
- })
- }
- // Stop any running model to apply the changes
- if (e.target.checked !== gpuEnabled) {
- stopModel().finally(() => {
- setTimeout(() => {
- window.location.reload()
- }, 300)
- })
- }
- }}
- />
- }
- content="Your current device does not have a compatible GPU for
- monitoring. To enable GPU monitoring, please ensure your
- device has a supported Nvidia or AMD GPU with updated
- drivers."
- disabled={gpuList.length > 0}
- />
-
-
-
-
-
- Choose device(s)
-
-
-
- }
- onClick={() => setOpen(!open)}
- />
- {gpuList.length > 0 && (
-
-
-
- {vulkanEnabled ? 'Vulkan Supported GPUs' : 'Nvidia'}
-
-
-
- {gpuList
- .filter((gpu) =>
- vulkanEnabled
- ? gpu.name
- : gpu.name?.toLowerCase().includes('nvidia')
- )
- .map((gpu) => (
-
- handleGPUChange(gpu.id)}
- label={
-
- {gpu.name}
- {!vulkanEnabled && (
- {gpu.vram}MB VRAM
- )}
-
- }
- />
-
- ))}
-
- {gpuEnabled && gpusInUse.length > 1 && (
-
-
-
- If multi-GPU is enabled with different GPU models
- or without NVLink, it could impact token speed.
-
-
- )}
-
-
-
- )}
-
-
-
- )}
-
- {/* Vulkan for AMD GPU/ APU and Intel Arc GPU */}
- {!isMac && experimentalEnabled && (
-
-
-
-
Vulkan Support
-
-
- Enable Vulkan with AMD GPU/APU and Intel Arc GPU for better
- model performance (reload needed).
-
-
-
- updateVulkanEnabled(e.target.checked)}
- />
-
-
- )}
-
{/* Proxy Settings Link */}
@@ -436,7 +126,7 @@ const Advanced = ({ setSubdir }: { setSubdir: (subdir: string) => void }) => {
HTTPS Proxy
- Optional proxy server for internet connections
+ Optional proxy server for internet connections.
@@ -461,11 +151,11 @@ const Advanced = ({ setSubdir }: { setSubdir: (subdir: string) => void }) => {
Jan Quick Ask
- Enable Quick Ask to be triggered via the default hotkey .
+ Enable Quick Ask to be triggered via the default hotkey
{isMac ? '⌘' : 'Ctrl'} + J
{' '}
- (reload needed).
+ .
{
- {coreActiveExtensions.length > 0 && (
-
-
- Core Extension
-
-
- )}
{coreActiveExtensions
.filter((x) => x.name.includes(searchText.toLowerCase().trim()))
.sort((a, b) => a.name.localeCompare(b.name))
diff --git a/web/screens/Settings/Engines/DeleteEngineVariant.tsx b/web/screens/Settings/Engines/DeleteEngineVariant.tsx
index 1033164e6..d21dac3d8 100644
--- a/web/screens/Settings/Engines/DeleteEngineVariant.tsx
+++ b/web/screens/Settings/Engines/DeleteEngineVariant.tsx
@@ -25,7 +25,7 @@ const DeleteEngineVariant = ({
return (
Delete {variant.name}}
+ title={Delete Variant }
open={open}
onOpenChange={() => setOpen(!open)}
trigger={
@@ -39,7 +39,8 @@ const DeleteEngineVariant = ({
content={
- Are you sure you want to delete this variant?
+ Are you sure you want to delete {variant.name}? This action cannot
+ be undone.
{
-
onSwitchChange(engine)}
- />
+ {engine !== InferenceEngine.cortex_llamacpp && (
+ onSwitchChange(engine)}
+ />
+ )}
{
switch (PLATFORM) {
case 'win32':
@@ -57,7 +60,7 @@ const LocalEngineSettings = ({ engine }: { engine: InferenceEngine }) => {
const isEngineUpdated =
latestReleasedEngine &&
- latestReleasedEngine.every((item) =>
+ latestReleasedEngine.some((item) =>
item.name.includes(
defaultEngineVariant?.version.replace(/^v/, '') as string
)
@@ -86,8 +89,8 @@ const LocalEngineSettings = ({ engine }: { engine: InferenceEngine }) => {
(x: any) => x.version === defaultEngineVariant?.version
)
- const [selectedVariants, setSelectedVariants] = useState(
- defaultEngineVariant?.variant
+ const [selectedVariants, setSelectedVariants] = useAtom(
+ LocalEngineDefaultVariantAtom
)
const selectedVariant = useMemo(
@@ -102,7 +105,7 @@ const LocalEngineSettings = ({ engine }: { engine: InferenceEngine }) => {
if (defaultEngineVariant?.variant) {
setSelectedVariants(defaultEngineVariant.variant || '')
}
- }, [defaultEngineVariant])
+ }, [defaultEngineVariant, setSelectedVariants])
const handleEngineUpdate = useCallback(
async (event: { id: string; type: DownloadEvent; percent: number }) => {
@@ -262,92 +265,98 @@ const LocalEngineSettings = ({ engine }: { engine: InferenceEngine }) => {
{releasedEnginesByVersion &&
- releasedEnginesByVersion?.map((item, i) => {
- return (
-
-
-
-
-
- {item.name}
-
+ releasedEnginesByVersion
+ ?.filter((item) => {
+ return !item.name.startsWith('cuda-')
+ })
+ .map((item, i) => {
+ return (
+
+
+
+
+
+ {item.name}
+
- {installedEngineByVersion?.some(
- (x) => x.name === item.name
- ) ? (
-
- ) : (
- <>
- {installingEngines.has(item.name) ? (
-
-
-
x.name === item.name
+ ) ? (
+
+ ) : (
+ <>
+ {installingEngines.has(item.name) ? (
+
+
+
+
+ {formatDownloadPercentage(
installingEngines.get(
item.name
- ) ?? 0,
- {
- hidePercentage: true,
- }
- ) as number
- }
- />
-
- {formatDownloadPercentage(
- installingEngines.get(item.name) ??
- 0
- )}
-
-
-
- ) : (
- {
- setInstallingEngines((prev) => {
- const updated = new Map(prev)
- updated.set(item.name, 0)
- return updated
- })
- installEngine(engine, {
- variant: item.name,
- version: String(
- defaultEngineVariant?.version
- ),
- }).then(() => {
- if (selectedVariants === '') {
- setSelectedVariants(item.name)
- }
- })
- }}
- >
- Download
-
- )}
- >
- )}
+ ) ?? 0
+ )}
+
+
+
+ ) : (
+
{
+ setInstallingEngines((prev) => {
+ const updated = new Map(prev)
+ updated.set(item.name, 0)
+ return updated
+ })
+ installEngine(engine, {
+ variant: item.name,
+ version: String(
+ defaultEngineVariant?.version
+ ),
+ }).then(() => {
+ if (selectedVariants === '') {
+ setSelectedVariants(item.name)
+ }
+ })
+ }}
+ >
+ Download
+
+ )}
+ >
+ )}
+
-
- )
- })}
+ )
+ })}
diff --git a/web/screens/Settings/Engines/ModalAddModel.tsx b/web/screens/Settings/Engines/ModalAddModel.tsx
index 40c986e92..1fbdabb6a 100644
--- a/web/screens/Settings/Engines/ModalAddModel.tsx
+++ b/web/screens/Settings/Engines/ModalAddModel.tsx
@@ -10,7 +10,7 @@ import { InferenceEngine, Model } from '@janhq/core'
import { Button, Input, Modal } from '@janhq/joi'
import { useAtomValue } from 'jotai'
-import { PlusIcon } from 'lucide-react'
+import { PlusIcon, ArrowUpRightFromSquare } from 'lucide-react'
import { z } from 'zod'
@@ -71,7 +71,7 @@ const ModelAddModel = ({ engine }: { engine: string }) => {
{prefix}
{label}
-
+
{desc}
{isRequired && * }
@@ -97,7 +97,7 @@ const ModelAddModel = ({ engine }: { engine: string }) => {
className="w-[500px]"
content={
@@ -321,7 +343,8 @@ const RemoteEngineSettings = ({
Request Headers Template
- Template for request headers format.
+ HTTP headers template required for API authentication
+ and version specification.
@@ -351,8 +374,8 @@ const RemoteEngineSettings = ({
Request Format Conversion
- Function to convert Jan’s request format to this engine
- API’s format.
+ Template to transform OpenAI-compatible requests into
+ provider-specific format.
@@ -385,8 +408,8 @@ const RemoteEngineSettings = ({
Response Format Conversion
- Function to convert Jan’s request format to this engine
- API’s format.
+ Template to transform provider responses into
+ OpenAI-compatible format.
diff --git a/web/screens/Settings/Hardware/index.tsx b/web/screens/Settings/Hardware/index.tsx
new file mode 100644
index 000000000..dbc4354d1
--- /dev/null
+++ b/web/screens/Settings/Hardware/index.tsx
@@ -0,0 +1,390 @@
+/* eslint-disable @typescript-eslint/no-explicit-any */
+import * as React from 'react'
+
+import { useState } from 'react'
+
+import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd'
+
+import { Progress, ScrollArea, Switch } from '@janhq/joi'
+import { useAtom, useAtomValue } from 'jotai'
+import { atomWithStorage } from 'jotai/utils'
+
+import { ChevronDownIcon, GripVerticalIcon } from 'lucide-react'
+
+import { twMerge } from 'tailwind-merge'
+
+import {
+ useGetHardwareInfo,
+ setActiveGpus,
+} from '@/hooks/useHardwareManagement'
+
+import { toGigabytes } from '@/utils/converter'
+
+import { utilizedMemory } from '@/utils/memory'
+
+import {
+ cpuUsageAtom,
+ ramUtilitizedAtom,
+ totalRamAtom,
+ usedRamAtom,
+ gpusAtom,
+} from '@/helpers/atoms/SystemBar.atom'
+
+const orderGpusAtom = atomWithStorage
('orderGpus', [], undefined, {
+ getOnInit: true,
+})
+
+const Hardware = () => {
+ const { hardware, mutate } = useGetHardwareInfo()
+ const [isActivatingGpu, setIsActivatingGpu] = useState>(new Set())
+
+ const [openPanels, setOpenPanels] = useState>({})
+
+ const cpuUsage = useAtomValue(cpuUsageAtom)
+ const totalRam = useAtomValue(totalRamAtom)
+ const usedRam = useAtomValue(usedRamAtom)
+ const ramUtilitized = useAtomValue(ramUtilitizedAtom)
+
+ const [gpus, setGpus] = useAtom(gpusAtom)
+
+ const [orderGpus, setOrderGpus] = useAtom(orderGpusAtom)
+
+ const togglePanel = (index: number) => {
+ setOpenPanels((prev) => ({
+ ...prev,
+ [index]: !prev[index], // Toggle the specific panel
+ }))
+ }
+
+ // Handle switch toggle for GPU activation
+ const handleSwitchChange = async (id: string, isActive: boolean) => {
+ setIsActivatingGpu((prev) => new Set(prev).add(id))
+
+ const updatedGpus = gpus.map((gpu) =>
+ gpu.id === id ? { ...gpu, activated: isActive } : gpu
+ )
+ // Call the API to update the active GPUs
+ try {
+ const activeGpuIds = updatedGpus
+ .filter((gpu: any) => gpu.activated)
+ .map((gpu: any) => Number(gpu.id))
+ await setActiveGpus({ gpus: activeGpuIds })
+ mutate()
+ window.location.reload()
+ } catch (error) {
+ console.error('Failed to update active GPUs:', error)
+ }
+ }
+
+ const handleDragEnd = (result: any) => {
+ if (!result.destination) return
+ const reorderedGpus = Array.from(gpus)
+ const [movedGpu] = reorderedGpus.splice(result.source.index, 1)
+ reorderedGpus.splice(result.destination.index, 0, movedGpu)
+
+ setGpus(reorderedGpus)
+ setOrderGpus(reorderedGpus.map((gpu) => gpu.id))
+ }
+
+ React.useEffect(() => {
+ if (hardware?.gpus) {
+ setGpus((prevGpus: any) => {
+ // Create a map of existing GPUs by UUID for quick lookup
+ const gpuMap = new Map(prevGpus.map((gpu: any) => [gpu.uuid, gpu]))
+
+ // Update existing GPUs or add new ones
+ const updatedGpus = hardware.gpus.map((newGpu) => {
+ const existingGpu: any = gpuMap.get(newGpu.uuid)
+
+ if (existingGpu) {
+ // Update the GPU properties while keeping the original order
+
+ if (existingGpu.activated !== newGpu.activated) {
+ setIsActivatingGpu((prev) => {
+ const updated = new Set(prev)
+ updated.delete(existingGpu.id)
+ updated.clear()
+ return updated
+ })
+ }
+
+ return {
+ ...existingGpu,
+ activated: newGpu.activated,
+ free_vram: newGpu.free_vram,
+ total_vram: newGpu.total_vram,
+ }
+ }
+
+ // Return the new GPU if not already in the state
+ return newGpu
+ })
+
+ // Append GPUs from the previous state that are not in the hardware.gpus
+ // This preserves user-reordered GPUs that aren't present in the new data
+ const remainingGpus = prevGpus.filter(
+ (prevGpu: any) =>
+ !hardware.gpus?.some((gpu) => gpu.uuid === prevGpu.uuid)
+ )
+
+ return [...updatedGpus, ...remainingGpus]
+ })
+ }
+ }, [hardware?.gpus, setGpus])
+
+ return (
+
+
+ {/* CPU */}
+
+
+
+
+
+ {hardware?.cpu.model}
+ |
+ Cores: {hardware?.cpu.cores}
+ |
+ Architecture: {hardware?.cpu.arch}
+
+
+
+
+
+ {/* RAM */}
+
+
+
+
+
+
+ {toGigabytes(usedRam, { hideUnit: true })}GB /{' '}
+ {toGigabytes(totalRam, { hideUnit: true })}GB
+
+ {hardware?.ram.type && (
+ <>
+ |
+ Type: {hardware?.ram.type}
+ >
+ )}
+
+
+
+
+
+ {/* OS */}
+
+
+
+
+
+ {hardware?.os.name}
+ |
+ {hardware?.os.version}
+
+
+
+
+ {/* GPUs */}
+ {!isMac && gpus.length > 0 && (
+
+
+
+
GPUs
+
+
+ {`Enhance model performance by utilizing your device's GPU for
+ acceleration.`}
+
+
+
+ {(provided) => (
+
+ {gpus
+ .sort((a, b) => {
+ const orderA = orderGpus.indexOf(a.id)
+ const orderB = orderGpus.indexOf(b.id)
+ return orderA - orderB
+ })
+ .map((item: any, i) => {
+ const gpuUtilization = utilizedMemory(
+ item.free_vram,
+ item.total_vram
+ )
+ const isLoading = isActivatingGpu.has(item.id)
+
+ return (
+
+ {(provided, snapshot) => (
+ 1 && 'last:rounded-t-none',
+ snapshot.isDragging
+ ? 'border-b'
+ : 'border-b-0 last:border-b'
+ )}
+ onClick={() => togglePanel(i)}
+ >
+
+
+
+
+ {item.activated && (
+
+
+
+ {gpuUtilization}%
+
+
+ )}
+
+
+ {item.activated && (
+
+ {(
+ (Number(item.total_vram) -
+ Number(item.free_vram)) /
+ 1024
+ ).toFixed(2)}
+ GB /{' '}
+
+ )}
+
+ {(
+ Number(item.total_vram) / 1024
+ ).toFixed(2)}
+ GB
+
+
+
+
+ handleSwitchChange(
+ item.id,
+ e.target.checked
+ )
+ }
+ />
+
+ {isLoading && (
+
+ )}
+
+
+
+
+
+
+ {openPanels[i] && (
+
+
+
+ Driver Version
+
+
+ {
+ item.additional_information
+ ?.driver_version
+ }
+
+
+
+
+ Compute Capability
+
+
+ {
+ item.additional_information
+ ?.compute_cap
+ }
+
+
+
+ )}
+
+ )}
+
+ )
+ })}
+ {provided.placeholder}
+
+ )}
+
+
+
+
+ )}
+
+
+ )
+}
+
+export default Hardware
diff --git a/web/screens/Settings/MyModels/MyModelList/index.tsx b/web/screens/Settings/MyModels/MyModelList/index.tsx
index 27dd5a34c..2159d2ebe 100644
--- a/web/screens/Settings/MyModels/MyModelList/index.tsx
+++ b/web/screens/Settings/MyModels/MyModelList/index.tsx
@@ -176,7 +176,7 @@ const MyModelList = ({ model }: Props) => {
onClick={() => {
setTimeout(async () => {
if (!serverEnabled) {
- await stopModel()
+ if (activeModel?.id === model.id) await stopModel()
deleteModel(model)
}
}, 500)
diff --git a/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx b/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx
index 3a1bad961..c3239083f 100644
--- a/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx
+++ b/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx
@@ -102,6 +102,7 @@ const SettingDetailTextInputItem = ({
@@ -118,11 +119,13 @@ type InputActionProps = {
onAction: (action: InputAction) => void
copied: boolean
obscure: boolean
+ value: string | string[]
}
const InputExtraActions: React.FC = ({
actions,
onAction,
+ value,
copied,
obscure,
}) => {
@@ -141,11 +144,15 @@ const InputExtraActions: React.FC = ({
className="text-green-600"
/>
) : (
- onAction('copy')}
- />
+ <>
+ {value.length > 0 && (
+ onAction('copy')}
+ />
+ )}
+ >
)
case 'unobscure':
diff --git a/web/screens/Settings/SettingDetail/index.tsx b/web/screens/Settings/SettingDetail/index.tsx
index 0d85ccbf4..8ceb600e6 100644
--- a/web/screens/Settings/SettingDetail/index.tsx
+++ b/web/screens/Settings/SettingDetail/index.tsx
@@ -13,6 +13,7 @@ import Engines from '@/screens/Settings/Engines'
import LocalEngineSettings from '@/screens/Settings/Engines/LocalEngineSettings'
import RemoteEngineSettings from '@/screens/Settings/Engines/RemoteEngineSettings'
import ExtensionSetting from '@/screens/Settings/ExtensionSetting'
+import Hardware from '@/screens/Settings/Hardware'
import Hotkeys from '@/screens/Settings/Hotkeys'
import MyModels from '@/screens/Settings/MyModels'
import Privacy from '@/screens/Settings/Privacy'
@@ -39,6 +40,9 @@ const SettingDetail = () => {
case 'Keyboard Shortcuts':
return
+ case 'Hardware':
+ return
+
case 'Privacy':
return
diff --git a/web/screens/Settings/index.tsx b/web/screens/Settings/index.tsx
index 66e11d07e..d126f0d0e 100644
--- a/web/screens/Settings/index.tsx
+++ b/web/screens/Settings/index.tsx
@@ -15,6 +15,7 @@ export const SettingScreenList = [
'My Models',
'Preferences',
'Keyboard Shortcuts',
+ 'Hardware',
'Privacy',
'Advanced Settings',
'Engines',
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/index.tsx
index c47d19d67..99c79534f 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatBody/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/index.tsx
@@ -160,7 +160,7 @@ const ChatBody = memo(
>
{items.map((virtualRow) => (
@@ -170,6 +170,7 @@ const ChatBody = memo(
{
return (
+ {renderPreview(fileUpload)}
- {renderPreview(fileUpload)}
((message, ref) => {
@@ -78,6 +79,7 @@ const ChatItem = forwardRef[((message, ref) => {
{...message}
content={content}
status={status}
+ index={message.index}
isCurrentMessage={message.isCurrentMessage ?? false}
/>
]
diff --git a/web/screens/Thread/ThreadCenterPanel/FileUploadPreview/index.tsx b/web/screens/Thread/ThreadCenterPanel/FileUploadPreview/index.tsx
index b91f99f5c..ada4ac468 100644
--- a/web/screens/Thread/ThreadCenterPanel/FileUploadPreview/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/FileUploadPreview/index.tsx
@@ -22,7 +22,7 @@ const FileUploadPreview = () => {
return (
{!!fileUpload && (
-
+
@@ -35,10 +35,10 @@ const FileUploadPreview = () => {
-
+
)}
diff --git a/web/screens/Thread/ThreadCenterPanel/ImageUploadPreview/index.tsx b/web/screens/Thread/ThreadCenterPanel/ImageUploadPreview/index.tsx
index 7fa9e417a..63103f1ec 100644
--- a/web/screens/Thread/ThreadCenterPanel/ImageUploadPreview/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ImageUploadPreview/index.tsx
@@ -35,13 +35,10 @@ const ImageUploadPreview: React.FC
= ({ file }) => {
return (
-
+
-
- {file.name.replaceAll(/[-._]/g, ' ')}
-
diff --git a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
index 3a887e8ea..0e679388f 100644
--- a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
@@ -1,9 +1,7 @@
import { EngineManager, InferenceEngine } from '@janhq/core'
import { useAtomValue, useSetAtom } from 'jotai'
-import ModalTroubleShooting, {
- modalTroubleShootingAtom,
-} from '@/containers/ModalTroubleShoot'
+import ErrorMessage from '@/containers/ErrorMessage'
import { MainViewState } from '@/constants/screens'
@@ -14,68 +12,48 @@ import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
const LoadModelError = () => {
- const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
const loadModelError = useAtomValue(loadModelErrorAtom)
const setMainState = useSetAtom(mainViewStateAtom)
const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
const activeAssistant = useAtomValue(activeAssistantAtom)
- const ErrorMessage = () => {
- if (
- typeof loadModelError?.includes === 'function' &&
- loadModelError.includes('EXTENSION_IS_NOT_INSTALLED')
- ) {
- return (
-
- Model is currently unavailable. Please switch to a different model or
- install the{' '}
- {
- setMainState(MainViewState.Settings)
- if (activeAssistant?.model.engine) {
- const engine = EngineManager.instance().get(
- InferenceEngine.cortex
- )
- engine?.name && setSelectedSettingScreen(engine.name)
- }
- }}
- >
- {loadModelError.split('::')[1] ?? ''}
- {' '}
- to continue using it.
-
- )
- } else {
- return (
-
- {loadModelError && (
-
{loadModelError}
- )}
-
- {`Something's wrong.`} Access
- setModalTroubleShooting(true)}
- >
- troubleshooting assistance
-
- now.
-
-
- )
- }
- }
-
return (
-
+
+ {typeof loadModelError?.includes === 'function' &&
+ loadModelError.includes('EXTENSION_IS_NOT_INSTALLED') ? (
+ <>
+
+ Model is currently unavailable. Please switch to a different
+ model or install the{' '}
+ {
+ setMainState(MainViewState.Settings)
+ if (activeAssistant?.model.engine) {
+ const engine = EngineManager.instance().get(
+ InferenceEngine.cortex
+ )
+ engine?.name && setSelectedSettingScreen(engine.name)
+ }
+ }}
+ >
+ {loadModelError.split('::')[1] ?? ''}
+ {' '}
+ to continue using it.
+
+ >
+ ) : (
+ <>
+ {loadModelError && (
+ {loadModelError}
+ )}
+ >
+ )}
+
+ }
+ />
)
}
export default LoadModelError
diff --git a/web/screens/Thread/ThreadCenterPanel/TextMessage/DocMessage.tsx b/web/screens/Thread/ThreadCenterPanel/TextMessage/DocMessage.tsx
index d5eec978f..ab5a4ce19 100644
--- a/web/screens/Thread/ThreadCenterPanel/TextMessage/DocMessage.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/TextMessage/DocMessage.tsx
@@ -16,7 +16,7 @@ const DocMessage = ({
const { onViewFile } = usePath()
return (
-
+
onViewFile(`${id}.pdf`)}
diff --git a/web/screens/Thread/ThreadCenterPanel/TextMessage/RelativeImage.tsx b/web/screens/Thread/ThreadCenterPanel/TextMessage/RelativeImage.tsx
index bfd13b0ef..65e43e6c4 100644
--- a/web/screens/Thread/ThreadCenterPanel/TextMessage/RelativeImage.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/TextMessage/RelativeImage.tsx
@@ -22,7 +22,7 @@ export const RelativeImage = ({
className={onClick ? 'cursor-pointer' : 'cursor-default'}
>
diff --git a/web/screens/Thread/ThreadCenterPanel/TextMessage/ThinkingBlock.tsx b/web/screens/Thread/ThreadCenterPanel/TextMessage/ThinkingBlock.tsx
new file mode 100644
index 000000000..0c9ded123
--- /dev/null
+++ b/web/screens/Thread/ThreadCenterPanel/TextMessage/ThinkingBlock.tsx
@@ -0,0 +1,63 @@
+import React from 'react'
+
+import { atom, useAtom } from 'jotai'
+import { ChevronDown, ChevronUp, Loader } from 'lucide-react'
+
+import { MarkdownTextMessage } from './MarkdownTextMessage'
+
+interface Props {
+ text: string
+ status: string
+ id: number
+}
+
+const thinkingBlockStateAtom = atom<{ [id: number]: boolean }>({})
+
+const ThinkingBlock = ({ id, text, status }: Props) => {
+ const [thinkingState, setThinkingState] = useAtom(thinkingBlockStateAtom)
+
+ const isExpanded = thinkingState[id] ?? false
+
+ const loading = !text.includes('') && status === 'pending'
+
+ const handleClick = () => {
+ setThinkingState((prev) => ({ ...prev, [id]: !isExpanded }))
+ }
+
+ if (!text.replace(/<\/?think>/g, '').trim()) return null
+
+ return (
+
+
+
+ {loading && (
+
+ )}
+
+ {isExpanded ? (
+
+ ) : (
+
+ )}
+
+ {loading ? 'Thinking...' : 'Thought'}
+
+
+
+
+ {isExpanded && (
+
+ /g, '').trim()}
+ />
+
+ )}
+
+
+ )
+}
+
+export default ThinkingBlock
diff --git a/web/screens/Thread/ThreadCenterPanel/TextMessage/index.tsx b/web/screens/Thread/ThreadCenterPanel/TextMessage/index.tsx
index 0f881c8d9..ab86a0142 100644
--- a/web/screens/Thread/ThreadCenterPanel/TextMessage/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/TextMessage/index.tsx
@@ -16,6 +16,7 @@ import MessageToolbar from '../MessageToolbar'
import DocMessage from './DocMessage'
import ImageMessage from './ImageMessage'
import { MarkdownTextMessage } from './MarkdownTextMessage'
+import ThinkingBlock from './ThinkingBlock'
import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
import {
@@ -26,7 +27,7 @@ import {
import { chatWidthAtom } from '@/helpers/atoms/Setting.atom'
const MessageContainer: React.FC<
- ThreadMessage & { isCurrentMessage: boolean }
+ ThreadMessage & { isCurrentMessage: boolean; index: number }
> = (props) => {
const isUser = props.role === ChatCompletionRole.User
const isSystem = props.role === ChatCompletionRole.System
@@ -41,6 +42,21 @@ const MessageContainer: React.FC<
[props.content]
)
+ const { reasoningSegment, textSegment } = useMemo(() => {
+ const isThinking = text.includes('
') && !text.includes(' ')
+ if (isThinking) return { reasoningSegment: text, textSegment: '' }
+
+ const match = text.match(/
([\s\S]*?)<\/think>/)
+ if (match?.index === undefined)
+ return { reasoningSegment: undefined, textSegment: text }
+
+ const splitIndex = match.index + match[0].length
+ return {
+ reasoningSegment: text.slice(0, splitIndex),
+ textSegment: text.slice(splitIndex),
+ }
+ }, [text])
+
const image = useMemo(
() =>
props.content.find((e) => e.type === ContentType.Image)?.image_url?.url,
@@ -144,7 +160,14 @@ const MessageContainer: React.FC<
)}
dir="ltr"
>
-
+ {reasoningSegment && (
+
+ )}
+
)}
>
diff --git a/web/screens/Thread/ThreadRightPanel/index.tsx b/web/screens/Thread/ThreadRightPanel/index.tsx
index ba801fd0b..e363c397f 100644
--- a/web/screens/Thread/ThreadRightPanel/index.tsx
+++ b/web/screens/Thread/ThreadRightPanel/index.tsx
@@ -182,7 +182,6 @@ const ThreadRightPanel = () => {
if (!activeThread || !activeAssistant) return
setEngineParamsUpdate(true)
- resetModel()
updateModelParameter(activeThread, {
params: { [key]: value },
@@ -202,25 +201,9 @@ const ThreadRightPanel = () => {
},
})
}
- if (
- key === 'ctx_len' &&
- Number(value) < activeAssistant.model.parameters.max_tokens
- ) {
- updateModelParameter(activeThread, {
- params: {
- max_tokens: activeAssistant.model.settings.ctx_len,
- },
- })
- }
}
},
- [
- activeAssistant,
- activeThread,
- resetModel,
- setEngineParamsUpdate,
- updateModelParameter,
- ]
+ [activeAssistant, activeThread, setEngineParamsUpdate, updateModelParameter]
)
if (!activeThread) {
@@ -297,7 +280,10 @@ const ThreadRightPanel = () => {
{
+ resetModel()
+ onValueChanged(key, value)
+ }}
/>
)}
diff --git a/web/services/appService.test.ts b/web/services/appService.test.ts
index 5172ea6ed..52e8ed63b 100644
--- a/web/services/appService.test.ts
+++ b/web/services/appService.test.ts
@@ -1,23 +1,34 @@
import { extensionManager } from '@/extension'
import { appService } from './appService'
-test('should return correct system information when monitoring extension is found', async () => {
- const mockGpuSetting = { name: 'NVIDIA GeForce GTX 1080', memory: 8192 }
- const mockOsInfo = { platform: 'win32', release: '10.0.19041' }
- const mockMonitoringExtension = {
- getGpuSetting: jest.fn().mockResolvedValue(mockGpuSetting),
- getOsInfo: jest.fn().mockResolvedValue(mockOsInfo),
+test('should return correct system information when hardware extension is found', async () => {
+
+ (global as any).isMac = false;
+ (global as any).PLATFORM = "win32";
+
+ const mock = { cpu: { arch: 'arc' }, ram: { available: 4000, total: 8000 }, gpus: [{name: 'NVIDIA GeForce GTX 1080', total_vram: 8192}] }
+
+ const mockHardwareExtension = {
+ getHardware: jest.fn().mockResolvedValue(mock),
}
- extensionManager.get = jest.fn().mockReturnValue(mockMonitoringExtension)
+ extensionManager.get = jest.fn().mockReturnValue(mockHardwareExtension)
const result = await appService.systemInformation()
- expect(mockMonitoringExtension.getGpuSetting).toHaveBeenCalled()
- expect(mockMonitoringExtension.getOsInfo).toHaveBeenCalled()
- expect(result).toEqual({ gpuSetting: mockGpuSetting, osInfo: mockOsInfo })
+ expect(mockHardwareExtension.getHardware).toHaveBeenCalled()
+
+ expect(result).toEqual({
+ gpuSetting: {gpus: mock.gpus, vulkan: false, cpu: {arch: mock.cpu.arch},},
+ osInfo: {
+ platform: 'win32',
+ arch: mock.cpu.arch,
+ freeMem: mock.ram.available,
+ totalMem: mock.ram.total,
+ },
+ })
})
-test('should log a warning when monitoring extension is not found', async () => {
+test('should log a warning when hardware extension is not found', async () => {
const consoleWarnMock = jest
.spyOn(console, 'warn')
.mockImplementation(() => {})
@@ -26,7 +37,7 @@ test('should log a warning when monitoring extension is not found', async () =>
await appService.systemInformation()
expect(consoleWarnMock).toHaveBeenCalledWith(
- 'System monitoring extension not found'
+ 'Hardware extension not found'
)
consoleWarnMock.mockRestore()
})
diff --git a/web/services/appService.ts b/web/services/appService.ts
index 16060e2d9..ec53b89bf 100644
--- a/web/services/appService.ts
+++ b/web/services/appService.ts
@@ -1,29 +1,56 @@
import {
ExtensionTypeEnum,
- MonitoringExtension,
+ HardwareManagementExtension,
+ SupportedPlatform,
SystemInformation,
+ GpuSetting,
+ GpuSettingInfo,
} from '@janhq/core'
+import { getDefaultStore } from 'jotai'
+
import { toaster } from '@/containers/Toast'
import { extensionManager } from '@/extension'
+import { LocalEngineDefaultVariantAtom } from '@/helpers/atoms/App.atom'
+
export const appService = {
systemInformation: async (): Promise
=> {
- const monitorExtension = extensionManager?.get(
- ExtensionTypeEnum.SystemMonitoring
+ const selectedVariants = getDefaultStore().get(
+ LocalEngineDefaultVariantAtom
)
- if (!monitorExtension) {
- console.warn('System monitoring extension not found')
+
+ const hardwareExtension =
+ extensionManager?.get(
+ ExtensionTypeEnum.Hardware
+ )
+
+ if (!hardwareExtension) {
+ console.warn('Hardware extension not found')
return undefined
}
- const gpuSetting = await monitorExtension.getGpuSetting()
- const osInfo = await monitorExtension.getOsInfo()
+ const hardwareInfo = await hardwareExtension?.getHardware()
+
+ const gpuSettingInfo: GpuSetting | undefined = {
+ gpus: hardwareInfo.gpus.filter(
+ (gpu) => gpu.total_vram > 0
+ ) as GpuSettingInfo[],
+ vulkan: isMac ? false : selectedVariants.includes('vulkan'),
+ cpu: hardwareInfo.cpu,
+ }
+
+ const updateOsInfo = {
+ platform: PLATFORM as SupportedPlatform,
+ arch: hardwareInfo.cpu.arch,
+ freeMem: hardwareInfo.ram.available,
+ totalMem: hardwareInfo.ram.total,
+ }
return {
- gpuSetting,
- osInfo,
+ gpuSetting: gpuSettingInfo,
+ osInfo: updateOsInfo,
}
},
diff --git a/web/tailwind.config.js b/web/tailwind.config.js
index 2d9685c1a..361d686e6 100644
--- a/web/tailwind.config.js
+++ b/web/tailwind.config.js
@@ -18,6 +18,7 @@ module.exports = {
'slide-in': 'slide-in 1.2s cubic-bezier(.41,.73,.51,1.02)',
'leave': 'leave 150ms ease-in forwards',
'bounce-right': 'bounce-right 3s infinite',
+ 'spin': 'spin 2s linear infinite',
},
keyframes: {
'wave': {
@@ -47,12 +48,15 @@ module.exports = {
'40%': { transform: 'translateX(-8px)' },
'60%': { transform: 'translateX(-4px)' },
},
+ 'spin': {
+ '0%': { transform: 'rotate(0deg)' },
+ '100%': { transform: 'rotate(360deg)' },
+ },
},
extend: {
fontFamily: {
- fontFamily: {
- sans: ['Inter var', ...defaultTheme.fontFamily.sans],
- },
+ sans: ['Inter', ...defaultTheme.fontFamily.sans],
+ serif: ['Roboto Mono', ...defaultTheme.fontFamily.mono],
},
},
},
diff --git a/web/utils/componentSettings.ts b/web/utils/componentSettings.ts
index 6e55d02e5..8ebcfd7c9 100644
--- a/web/utils/componentSettings.ts
+++ b/web/utils/componentSettings.ts
@@ -27,6 +27,10 @@ export const getConfigurationsData = (
componentSetting.controllerProps.max ||
4096
break
+ case 'temperature':
+ componentSetting.controllerProps.max =
+ selectedModel?.parameters?.max_temperature || 2
+ break
case 'ctx_len':
componentSetting.controllerProps.max =
selectedModel?.settings.ctx_len ||
diff --git a/web/utils/modelEngine.ts b/web/utils/modelEngine.ts
index 9ef17fd17..88479e27a 100644
--- a/web/utils/modelEngine.ts
+++ b/web/utils/modelEngine.ts
@@ -1,6 +1,6 @@
import { Engines, InferenceEngine } from '@janhq/core'
-export const getLogoEngine = (engine: InferenceEngine) => {
+export const getLogoEngine = (engine: InferenceEngine | string) => {
switch (engine) {
case InferenceEngine.nitro:
case InferenceEngine.cortex_llamacpp:
@@ -28,6 +28,11 @@ export const getLogoEngine = (engine: InferenceEngine) => {
return 'images/ModelProvider/cohere.svg'
case InferenceEngine.nvidia:
return 'images/ModelProvider/nvidia.svg'
+ // MARK: Deprecating InferenceEngine enum - as it's dynamically provided by the backend
+ case 'google_gemini':
+ return 'images/ModelProvider/google.svg'
+ case 'deepseek':
+ return 'images/ModelProvider/deepseek.svg'
default:
return undefined
}
@@ -44,7 +49,7 @@ export const isLocalEngine = (engines?: Engines, engine?: string) => {
return engines[engine as InferenceEngine]?.[0]?.type === 'local'
}
-export const getTitleByEngine = (engine: InferenceEngine) => {
+export const getTitleByEngine = (engine: InferenceEngine | string) => {
switch (engine) {
case InferenceEngine.nitro:
case InferenceEngine.cortex_llamacpp:
@@ -59,6 +64,8 @@ export const getTitleByEngine = (engine: InferenceEngine) => {
return 'OpenAI'
case InferenceEngine.openrouter:
return 'OpenRouter'
+ case 'google_gemini':
+ return 'Google'
default:
return engine.charAt(0).toUpperCase() + engine.slice(1)
}
diff --git a/web/utils/settings.ts b/web/utils/settings.ts
new file mode 100644
index 000000000..75a60bc52
--- /dev/null
+++ b/web/utils/settings.ts
@@ -0,0 +1,22 @@
+import { AppConfiguration } from '@janhq/core'
+
+/**
+ * Update app distinct Id
+ * @param id
+ */
+export const updateDistinctId = async (id: string) => {
+ const appConfiguration: AppConfiguration =
+ await window.core?.api?.getAppConfigurations()
+ appConfiguration.distinct_id = id
+ await window.core?.api?.updateAppConfiguration(appConfiguration)
+}
+
+/**
+ * Retrieve app distinct Id
+ * @param id
+ */
+export const getAppDistinctId = async (): Promise => {
+ const appConfiguration: AppConfiguration =
+ await window.core?.api?.getAppConfigurations()
+ return appConfiguration.distinct_id
+}