Merge pull request #4684 from janhq/release/v0.5.15

chore: merge release 0.5.15 branch into main branch
This commit is contained in:
Louis 2025-02-18 18:13:27 +07:00 committed by GitHub
commit 07428b4cdc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
188 changed files with 4962 additions and 3365 deletions

View File

@ -6,6 +6,7 @@ on:
jobs: jobs:
assign-author: assign-author:
runs-on: ubuntu-latest runs-on: ubuntu-latest
if: ${{ github.event.pull_request.head.repo.full_name == github.repository }}
permissions: permissions:
pull-requests: write pull-requests: write
steps: steps:

View File

@ -9,31 +9,6 @@ jobs:
get-update-version: get-update-version:
uses: ./.github/workflows/template-get-update-version.yml uses: ./.github/workflows/template-get-update-version.yml
create-draft-release:
runs-on: ubuntu-latest
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
outputs:
upload_url: ${{ steps.create_release.outputs.upload_url }}
version: ${{ steps.get_version.outputs.version }}
permissions:
contents: write
steps:
- name: Extract tag name without v prefix
id: get_version
run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}"
env:
GITHUB_REF: ${{ github.ref }}
- name: Create Draft Release
id: create_release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ github.ref_name }}
token: ${{ secrets.GITHUB_TOKEN }}
name: "${{ env.VERSION }}"
draft: true
prerelease: false
generate_release_notes: true
build-macos: build-macos:
uses: ./.github/workflows/template-build-macos.yml uses: ./.github/workflows/template-build-macos.yml
secrets: inherit secrets: inherit
@ -65,7 +40,7 @@ jobs:
beta: true beta: true
sync-temp-to-latest: sync-temp-to-latest:
needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64] needs: [build-macos, build-windows-x64, build-linux-x64]
runs-on: ubuntu-latest runs-on: ubuntu-latest
permissions: permissions:
contents: write contents: write
@ -82,19 +57,15 @@ jobs:
AWS_DEFAULT_REGION: ${{ secrets.DELTA_AWS_REGION }} AWS_DEFAULT_REGION: ${{ secrets.DELTA_AWS_REGION }}
AWS_EC2_METADATA_DISABLED: "true" AWS_EC2_METADATA_DISABLED: "true"
- name: set release to prerelease
run: |
gh release edit v${{ needs.create-draft-release.outputs.version }} --draft=false --prerelease
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
noti-discord-and-update-url-readme: noti-discord-and-update-url-readme:
needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64, sync-temp-to-latest] needs: [build-macos, get-update-version, build-windows-x64, build-linux-x64, sync-temp-to-latest]
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Set version to environment variable - name: Set version to environment variable
run: | run: |
echo "VERSION=${{ needs.create-draft-release.outputs.version }}" >> $GITHUB_ENV VERSION=${{ needs.get-update-version.outputs.new_version }}
VERSION="${VERSION#v}"
echo "VERSION=$VERSION" >> $GITHUB_ENV
- name: Notify Discord - name: Notify Discord
uses: Ilshidur/action-discord@master uses: Ilshidur/action-discord@master
@ -105,6 +76,5 @@ jobs:
- macOS Universal: https://delta.jan.ai/beta/jan-beta-mac-universal-{{ VERSION }}.dmg - macOS Universal: https://delta.jan.ai/beta/jan-beta-mac-universal-{{ VERSION }}.dmg
- Linux Deb: https://delta.jan.ai/beta/jan-beta-linux-amd64-{{ VERSION }}.deb - Linux Deb: https://delta.jan.ai/beta/jan-beta-linux-amd64-{{ VERSION }}.deb
- Linux AppImage: https://delta.jan.ai/beta/jan-beta-linux-x86_64-{{ VERSION }}.AppImage - Linux AppImage: https://delta.jan.ai/beta/jan-beta-linux-x86_64-{{ VERSION }}.AppImage
- Github Release URL: https://github.com/janhq/jan/releases/tag/v{{ VERSION }}
env: env:
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_JAN_BETA }} DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_JAN_BETA }}

View File

@ -1,39 +0,0 @@
name: build-jan-server
on:
workflow_call:
inputs:
dockerfile_path:
required: false
type: string
default: './Dockerfile'
docker_image_tag:
required: true
type: string
default: 'ghcr.io/janhq/jan-server:dev-latest'
jobs:
build:
runs-on: ubuntu-latest
env:
REGISTRY: ghcr.io
IMAGE_NAME: janhq/jan-server
permissions:
packages: write
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push Docker image
uses: docker/build-push-action@v3
with:
context: .
file: ${{ inputs.dockerfile_path }}
push: true
tags: ${{ inputs.docker_image_tag }}

View File

@ -83,7 +83,7 @@ jobs:
cat ./electron/package.json cat ./electron/package.json
echo "------------------------" echo "------------------------"
cat ./package.json cat ./package.json
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
mv /tmp/package.json electron/package.json mv /tmp/package.json electron/package.json
cat electron/package.json cat electron/package.json
@ -122,8 +122,6 @@ jobs:
make build-and-publish make build-and-publish
env: env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }}
ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }}
POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }} POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }} POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}

View File

@ -99,7 +99,7 @@ jobs:
cat ./electron/package.json cat ./electron/package.json
echo "------------------------" echo "------------------------"
cat ./package.json cat ./package.json
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
mv /tmp/package.json electron/package.json mv /tmp/package.json electron/package.json
cat electron/package.json cat electron/package.json
@ -168,8 +168,6 @@ jobs:
APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }} APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
APP_PATH: '.' APP_PATH: '.'
DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }} DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }}
ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }}
ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }}
POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }} POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }} POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}

View File

@ -108,7 +108,7 @@ jobs:
cat ./package.json cat ./package.json
echo "------------------------" echo "------------------------"
cat ./electron/scripts/uninstaller.nsh cat ./electron/scripts/uninstaller.nsh
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
mv /tmp/package.json electron/package.json mv /tmp/package.json electron/package.json
cat electron/package.json cat electron/package.json
@ -160,8 +160,6 @@ jobs:
make build-and-publish make build-and-publish
env: env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }}
ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }}
AZURE_KEY_VAULT_URI: ${{ secrets.AZURE_KEY_VAULT_URI }} AZURE_KEY_VAULT_URI: ${{ secrets.AZURE_KEY_VAULT_URI }}
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}

View File

@ -12,6 +12,7 @@ export enum ExtensionTypeEnum {
SystemMonitoring = 'systemMonitoring', SystemMonitoring = 'systemMonitoring',
HuggingFace = 'huggingFace', HuggingFace = 'huggingFace',
Engine = 'engine', Engine = 'engine',
Hardware = 'hardware',
} }
export interface ExtensionType { export interface ExtensionType {

View File

@ -38,8 +38,14 @@ describe('OAIEngine', () => {
it('should subscribe to events on load', () => { it('should subscribe to events on load', () => {
engine.onLoad() engine.onLoad()
expect(events.on).toHaveBeenCalledWith(MessageEvent.OnMessageSent, expect.any(Function)) expect(events.on).toHaveBeenCalledWith(
expect(events.on).toHaveBeenCalledWith(InferenceEvent.OnInferenceStopped, expect.any(Function)) MessageEvent.OnMessageSent,
expect.any(Function)
)
expect(events.on).toHaveBeenCalledWith(
InferenceEvent.OnInferenceStopped,
expect.any(Function)
)
}) })
it('should handle inference request', async () => { it('should handle inference request', async () => {
@ -77,7 +83,12 @@ describe('OAIEngine', () => {
expect(events.emit).toHaveBeenCalledWith( expect(events.emit).toHaveBeenCalledWith(
MessageEvent.OnMessageUpdate, MessageEvent.OnMessageUpdate,
expect.objectContaining({ expect.objectContaining({
content: [{ type: ContentType.Text, text: { value: 'test response', annotations: [] } }], content: [
{
type: ContentType.Text,
text: { value: 'test response', annotations: [] },
},
],
status: MessageStatus.Ready, status: MessageStatus.Ready,
}) })
) )
@ -101,11 +112,10 @@ describe('OAIEngine', () => {
await engine.inference(data) await engine.inference(data)
expect(events.emit).toHaveBeenCalledWith( expect(events.emit).toHaveBeenLastCalledWith(
MessageEvent.OnMessageUpdate, MessageEvent.OnMessageUpdate,
expect.objectContaining({ expect.objectContaining({
content: [{ type: ContentType.Text, text: { value: 'test error', annotations: [] } }], status: 'error',
status: MessageStatus.Error,
error_code: 500, error_code: 500,
}) })
) )

View File

@ -42,7 +42,9 @@ export abstract class OAIEngine extends AIEngine {
*/ */
override onLoad() { override onLoad() {
super.onLoad() super.onLoad()
events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data)) events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
this.inference(data)
)
events.on(InferenceEvent.OnInferenceStopped, () => this.stopInference()) events.on(InferenceEvent.OnInferenceStopped, () => this.stopInference())
} }
@ -128,7 +130,9 @@ export abstract class OAIEngine extends AIEngine {
events.emit(MessageEvent.OnMessageUpdate, message) events.emit(MessageEvent.OnMessageUpdate, message)
}, },
complete: async () => { complete: async () => {
message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error message.status = message.content.length
? MessageStatus.Ready
: MessageStatus.Error
events.emit(MessageEvent.OnMessageUpdate, message) events.emit(MessageEvent.OnMessageUpdate, message)
}, },
error: async (err: any) => { error: async (err: any) => {
@ -141,7 +145,10 @@ export abstract class OAIEngine extends AIEngine {
message.content[0] = { message.content[0] = {
type: ContentType.Text, type: ContentType.Text,
text: { text: {
value: err.message, value:
typeof message === 'string'
? err.message
: (JSON.stringify(err.message) ?? err.detail),
annotations: [], annotations: [],
}, },
} }

View File

@ -1,14 +1,17 @@
import { lastValueFrom, Observable } from 'rxjs' import { lastValueFrom, Observable } from 'rxjs'
import { requestInference } from './sse' import { requestInference } from './sse'
import { ReadableStream } from 'stream/web'; import { ReadableStream } from 'stream/web'
describe('requestInference', () => { describe('requestInference', () => {
it('should send a request to the inference server and return an Observable', () => { it('should send a request to the inference server and return an Observable', () => {
// Mock the fetch function // Mock the fetch function
const mockFetch: any = jest.fn(() => const mockFetch: any = jest.fn(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }), json: () =>
Promise.resolve({
choices: [{ message: { content: 'Generated response' } }],
}),
headers: new Headers(), headers: new Headers(),
redirected: false, redirected: false,
status: 200, status: 200,
@ -36,7 +39,10 @@ describe('requestInference', () => {
const mockFetch: any = jest.fn(() => const mockFetch: any = jest.fn(() =>
Promise.resolve({ Promise.resolve({
ok: false, ok: false,
json: () => Promise.resolve({ error: { message: 'Wrong API Key', code: 'invalid_api_key' } }), json: () =>
Promise.resolve({
error: { message: 'Invalid API Key.', code: 'invalid_api_key' },
}),
headers: new Headers(), headers: new Headers(),
redirected: false, redirected: false,
status: 401, status: 401,
@ -56,16 +62,22 @@ describe('requestInference', () => {
// Assert the expected behavior // Assert the expected behavior
expect(result).toBeInstanceOf(Observable) expect(result).toBeInstanceOf(Observable)
expect(lastValueFrom(result)).rejects.toEqual({ message: 'Wrong API Key', code: 'invalid_api_key' }) expect(lastValueFrom(result)).rejects.toEqual({
message: 'Invalid API Key.',
code: 'invalid_api_key',
})
}) })
}) })
it('should handle a successful response with a transformResponse function', () => { it('should handle a successful response with a transformResponse function', () => {
// Mock the fetch function // Mock the fetch function
const mockFetch: any = jest.fn(() => const mockFetch: any = jest.fn(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }), json: () =>
Promise.resolve({
choices: [{ message: { content: 'Generated response' } }],
}),
headers: new Headers(), headers: new Headers(),
redirected: false, redirected: false,
status: 200, status: 200,
@ -78,47 +90,57 @@ describe('requestInference', () => {
const inferenceUrl = 'https://inference-server.com' const inferenceUrl = 'https://inference-server.com'
const requestBody = { message: 'Hello' } const requestBody = { message: 'Hello' }
const model = { id: 'model-id', parameters: { stream: false } } const model = { id: 'model-id', parameters: { stream: false } }
const transformResponse = (data: any) => data.choices[0].message.content.toUpperCase() const transformResponse = (data: any) =>
data.choices[0].message.content.toUpperCase()
// Call the function // Call the function
const result = requestInference(inferenceUrl, requestBody, model, undefined, undefined, transformResponse) const result = requestInference(
inferenceUrl,
requestBody,
model,
undefined,
undefined,
transformResponse
)
// Assert the expected behavior // Assert the expected behavior
expect(result).toBeInstanceOf(Observable) expect(result).toBeInstanceOf(Observable)
expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE') expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE')
}) })
it('should handle a successful response with streaming enabled', () => {
it('should handle a successful response with streaming enabled', () => {
// Mock the fetch function // Mock the fetch function
const mockFetch: any = jest.fn(() => const mockFetch: any = jest.fn(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
body: new ReadableStream({ body: new ReadableStream({
start(controller) { start(controller) {
controller.enqueue(new TextEncoder().encode('data: {"choices": [{"delta": {"content": "Streamed"}}]}')); controller.enqueue(
controller.enqueue(new TextEncoder().encode('data: [DONE]')); new TextEncoder().encode(
controller.close(); 'data: {"choices": [{"delta": {"content": "Streamed"}}]}'
} )
)
controller.enqueue(new TextEncoder().encode('data: [DONE]'))
controller.close()
},
}), }),
headers: new Headers(), headers: new Headers(),
redirected: false, redirected: false,
status: 200, status: 200,
statusText: 'OK', statusText: 'OK',
}) })
); )
jest.spyOn(global, 'fetch').mockImplementation(mockFetch); jest.spyOn(global, 'fetch').mockImplementation(mockFetch)
// Define the test inputs // Define the test inputs
const inferenceUrl = 'https://inference-server.com'; const inferenceUrl = 'https://inference-server.com'
const requestBody = { message: 'Hello' }; const requestBody = { message: 'Hello' }
const model = { id: 'model-id', parameters: { stream: true } }; const model = { id: 'model-id', parameters: { stream: true } }
// Call the function // Call the function
const result = requestInference(inferenceUrl, requestBody, model); const result = requestInference(inferenceUrl, requestBody, model)
// Assert the expected behavior // Assert the expected behavior
expect(result).toBeInstanceOf(Observable); expect(result).toBeInstanceOf(Observable)
expect(lastValueFrom(result)).resolves.toEqual('Streamed'); expect(lastValueFrom(result)).resolves.toEqual('Streamed')
}); })

View File

@ -32,21 +32,20 @@ export function requestInference(
}) })
.then(async (response) => { .then(async (response) => {
if (!response.ok) { if (!response.ok) {
const data = await response.json() if (response.status === 401) {
let errorCode = ErrorCode.Unknown throw {
if (data.error) { code: ErrorCode.InvalidApiKey,
errorCode = data.error.code ?? data.error.type ?? ErrorCode.Unknown message: 'Invalid API Key.',
} else if (response.status === 401) {
errorCode = ErrorCode.InvalidApiKey
} }
const error = {
message: data.error?.message ?? data.message ?? 'Error occurred.',
code: errorCode,
} }
subscriber.error(error) let data = await response.json()
subscriber.complete() try {
handleError(data)
} catch (err) {
subscriber.error(err)
return return
} }
}
// There could be overriden stream parameter in the model // There could be overriden stream parameter in the model
// that is set in request body (transformed payload) // that is set in request body (transformed payload)
if ( if (
@ -54,9 +53,10 @@ export function requestInference(
model.parameters?.stream === false model.parameters?.stream === false
) { ) {
const data = await response.json() const data = await response.json()
if (data.error || data.message) { try {
subscriber.error(data.error ?? data) handleError(data)
subscriber.complete() } catch (err) {
subscriber.error(err)
return return
} }
if (transformResponse) { if (transformResponse) {
@ -91,13 +91,10 @@ export function requestInference(
const toParse = cachedLines + line const toParse = cachedLines + line
if (!line.includes('data: [DONE]')) { if (!line.includes('data: [DONE]')) {
const data = JSON.parse(toParse.replace('data: ', '')) const data = JSON.parse(toParse.replace('data: ', ''))
if ( try {
'error' in data || handleError(data)
'message' in data || } catch (err) {
'detail' in data subscriber.error(err)
) {
subscriber.error(data.error ?? data)
subscriber.complete()
return return
} }
content += data.choices[0]?.delta?.content ?? '' content += data.choices[0]?.delta?.content ?? ''
@ -118,3 +115,18 @@ export function requestInference(
.catch((err) => subscriber.error(err)) .catch((err) => subscriber.error(err))
}) })
} }
/**
* Handle error and normalize it to a common format.
* @param data
*/
const handleError = (data: any) => {
if (
data.error ||
data.message ||
data.detail ||
(Array.isArray(data) && data.length && data[0].error)
) {
throw data.error ?? data[0]?.error ?? data
}
}

View File

@ -5,6 +5,7 @@ import {
EngineReleased, EngineReleased,
EngineConfig, EngineConfig,
DefaultEngineVariant, DefaultEngineVariant,
Model,
} from '../../types' } from '../../types'
import { BaseExtension, ExtensionTypeEnum } from '../extension' import { BaseExtension, ExtensionTypeEnum } from '../extension'
@ -103,6 +104,11 @@ export abstract class EngineManagementExtension extends BaseExtension {
engineConfig?: EngineConfig engineConfig?: EngineConfig
): Promise<{ messages: string }> ): Promise<{ messages: string }>
/**
* Add a new remote model for a specific engine
*/
abstract addRemoteModel(model: Model): Promise<void>
/** /**
* @returns A Promise that resolves to an object of remote models list . * @returns A Promise that resolves to an object of remote models list .
*/ */

View File

@ -0,0 +1,26 @@
import { HardwareInformation } from '../../types'
import { BaseExtension, ExtensionTypeEnum } from '../extension'
/**
* Engine management extension. Persists and retrieves engine management.
* @abstract
* @extends BaseExtension
*/
export abstract class HardwareManagementExtension extends BaseExtension {
type(): ExtensionTypeEnum | undefined {
return ExtensionTypeEnum.Hardware
}
/**
* @returns A Promise that resolves to an object of list hardware.
*/
abstract getHardware(): Promise<HardwareInformation>
/**
* @returns A Promise that resolves to an object of set active gpus.
*/
abstract setAvtiveGpu(data: { gpus: number[] }): Promise<{
message: string
activated_gpus: number[]
}>
}

View File

@ -1,6 +1,5 @@
import { ConversationalExtension } from './index'; import { ConversationalExtension } from './index';
import { InferenceExtension } from './index'; import { InferenceExtension } from './index';
import { MonitoringExtension } from './index';
import { AssistantExtension } from './index'; import { AssistantExtension } from './index';
import { ModelExtension } from './index'; import { ModelExtension } from './index';
import * as Engines from './index'; import * as Engines from './index';
@ -14,10 +13,6 @@ describe('index.ts exports', () => {
expect(InferenceExtension).toBeDefined(); expect(InferenceExtension).toBeDefined();
}); });
test('should export MonitoringExtension', () => {
expect(MonitoringExtension).toBeDefined();
});
test('should export AssistantExtension', () => { test('should export AssistantExtension', () => {
expect(AssistantExtension).toBeDefined(); expect(AssistantExtension).toBeDefined();
}); });

View File

@ -9,10 +9,7 @@ export { ConversationalExtension } from './conversational'
*/ */
export { InferenceExtension } from './inference' export { InferenceExtension } from './inference'
/**
* Monitoring extension for system monitoring.
*/
export { MonitoringExtension } from './monitoring'
/** /**
* Assistant extension for managing assistants. * Assistant extension for managing assistants.
@ -33,3 +30,8 @@ export * from './engines'
* Engines Management * Engines Management
*/ */
export * from './enginesManagement' export * from './enginesManagement'
/**
* Hardware Management
*/
export * from './hardwareManagement'

View File

@ -1,42 +0,0 @@
import { ExtensionTypeEnum } from '../extension';
import { MonitoringExtension } from './monitoring';
it('should have the correct type', () => {
class TestMonitoringExtension extends MonitoringExtension {
getGpuSetting(): Promise<GpuSetting | undefined> {
throw new Error('Method not implemented.');
}
getResourcesInfo(): Promise<any> {
throw new Error('Method not implemented.');
}
getCurrentLoad(): Promise<any> {
throw new Error('Method not implemented.');
}
getOsInfo(): Promise<OperatingSystemInfo> {
throw new Error('Method not implemented.');
}
}
const monitoringExtension = new TestMonitoringExtension();
expect(monitoringExtension.type()).toBe(ExtensionTypeEnum.SystemMonitoring);
});
it('should create an instance of MonitoringExtension', () => {
class TestMonitoringExtension extends MonitoringExtension {
getGpuSetting(): Promise<GpuSetting | undefined> {
throw new Error('Method not implemented.');
}
getResourcesInfo(): Promise<any> {
throw new Error('Method not implemented.');
}
getCurrentLoad(): Promise<any> {
throw new Error('Method not implemented.');
}
getOsInfo(): Promise<OperatingSystemInfo> {
throw new Error('Method not implemented.');
}
}
const monitoringExtension = new TestMonitoringExtension();
expect(monitoringExtension).toBeInstanceOf(MonitoringExtension);
});

View File

@ -1,20 +0,0 @@
import { BaseExtension, ExtensionTypeEnum } from '../extension'
import { GpuSetting, MonitoringInterface, OperatingSystemInfo } from '../../types'
/**
* Monitoring extension for system monitoring.
* @extends BaseExtension
*/
export abstract class MonitoringExtension extends BaseExtension implements MonitoringInterface {
/**
* Monitoring extension type.
*/
type(): ExtensionTypeEnum | undefined {
return ExtensionTypeEnum.SystemMonitoring
}
abstract getGpuSetting(): Promise<GpuSetting | undefined>
abstract getResourcesInfo(): Promise<any>
abstract getCurrentLoad(): Promise<any>
abstract getOsInfo(): Promise<OperatingSystemInfo>
}

View File

@ -1,4 +1,5 @@
export type AppConfiguration = { export type AppConfiguration = {
data_folder: string data_folder: string
quick_ask: boolean quick_ask: boolean
distinct_id?: string
} }

View File

@ -18,6 +18,7 @@ export type EngineMetadata = {
template?: string template?: string
} }
} }
explore_models_url?: string
} }
export type EngineVariant = { export type EngineVariant = {

View File

@ -0,0 +1,55 @@
export type Cpu = {
arch: string
cores: number
instructions: string[]
model: string
usage: number
}
export type GpuAdditionalInformation = {
compute_cap: string
driver_version: string
}
export type Gpu = {
activated: boolean
additional_information?: GpuAdditionalInformation
free_vram: number
id: string
name: string
total_vram: number
uuid: string
version: string
}
export type Os = {
name: string
version: string
}
export type Power = {
battery_life: number
charging_status: string
is_power_saving: boolean
}
export type Ram = {
available: number
total: number
type: string
}
export type Storage = {
available: number
total: number
type: string
}
export type HardwareInformation = {
cpu: Cpu
gpus: Gpu[]
os: Os
power: Power
ram: Ram
storage: Storage
}

View File

@ -4,7 +4,6 @@ import * as model from './model';
import * as thread from './thread'; import * as thread from './thread';
import * as message from './message'; import * as message from './message';
import * as inference from './inference'; import * as inference from './inference';
import * as monitoring from './monitoring';
import * as file from './file'; import * as file from './file';
import * as config from './config'; import * as config from './config';
import * as huggingface from './huggingface'; import * as huggingface from './huggingface';
@ -18,7 +17,6 @@ import * as setting from './setting';
expect(thread).toBeDefined(); expect(thread).toBeDefined();
expect(message).toBeDefined(); expect(message).toBeDefined();
expect(inference).toBeDefined(); expect(inference).toBeDefined();
expect(monitoring).toBeDefined();
expect(file).toBeDefined(); expect(file).toBeDefined();
expect(config).toBeDefined(); expect(config).toBeDefined();
expect(huggingface).toBeDefined(); expect(huggingface).toBeDefined();

View File

@ -3,7 +3,6 @@ export * from './model'
export * from './thread' export * from './thread'
export * from './message' export * from './message'
export * from './inference' export * from './inference'
export * from './monitoring'
export * from './file' export * from './file'
export * from './config' export * from './config'
export * from './huggingface' export * from './huggingface'
@ -11,3 +10,4 @@ export * from './miscellaneous'
export * from './api' export * from './api'
export * from './setting' export * from './setting'
export * from './engine' export * from './engine'
export * from './hardware'

View File

@ -1,33 +1,25 @@
import { GpuAdditionalInformation } from '../hardware'
export type SystemResourceInfo = { export type SystemResourceInfo = {
memAvailable: number memAvailable: number
} }
export type RunMode = 'cpu' | 'gpu'
export type GpuSetting = { export type GpuSetting = {
notify: boolean
run_mode: RunMode
nvidia_driver: {
exist: boolean
version: string
}
cuda: {
exist: boolean
version: string
}
gpus: GpuSettingInfo[] gpus: GpuSettingInfo[]
gpu_highest_vram: string
gpus_in_use: string[]
is_initial: boolean
// TODO: This needs to be set based on user toggle in settings // TODO: This needs to be set based on user toggle in settings
vulkan: boolean vulkan: boolean
cpu?: any
} }
export type GpuSettingInfo = { export type GpuSettingInfo = {
activated: boolean
free_vram: number
id: string id: string
vram: string
name: string name: string
arch?: string total_vram: number
uuid: string
version: string
additional_information?: GpuAdditionalInformation
} }
export type SystemInformation = { export type SystemInformation = {
@ -42,9 +34,6 @@ export type SupportedPlatform = SupportedPlatformTuple[number]
export type OperatingSystemInfo = { export type OperatingSystemInfo = {
platform: SupportedPlatform | 'unknown' platform: SupportedPlatform | 'unknown'
arch: string arch: string
release: string
machine: string
version: string
totalMem: number totalMem: number
freeMem: number freeMem: number
} }

View File

@ -1,5 +1,3 @@
import { FileMetadata } from '../file'
/** /**
* Represents the information about a model. * Represents the information about a model.
* @stored * @stored
@ -70,6 +68,11 @@ export type Model = {
*/ */
id: string id: string
/**
* The model identifier, modern version of id.
*/
model?: string
/** /**
* Human-readable name that is used for UI. * Human-readable name that is used for UI.
*/ */
@ -147,6 +150,7 @@ export type ModelSettingParams = {
*/ */
export type ModelRuntimeParams = { export type ModelRuntimeParams = {
temperature?: number temperature?: number
max_temperature?: number
token_limit?: number token_limit?: number
top_k?: number top_k?: number
top_p?: number top_p?: number

View File

@ -1,13 +0,0 @@
import * as monitoringInterface from './monitoringInterface'
import * as resourceInfo from './resourceInfo'
import * as index from './index'
it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
for (const key in monitoringInterface) {
expect(index[key]).toBe(monitoringInterface[key])
}
for (const key in resourceInfo) {
expect(index[key]).toBe(resourceInfo[key])
}
})

View File

@ -1,2 +0,0 @@
export * from './monitoringInterface'
export * from './resourceInfo'

View File

@ -1,29 +0,0 @@
import { GpuSetting, OperatingSystemInfo } from '../miscellaneous'
/**
* Monitoring extension for system monitoring.
* @extends BaseExtension
*/
export interface MonitoringInterface {
/**
* Returns information about the system resources.
* @returns {Promise<any>} A promise that resolves with the system resources information.
*/
getResourcesInfo(): Promise<any>
/**
* Returns the current system load.
* @returns {Promise<any>} A promise that resolves with the current system load.
*/
getCurrentLoad(): Promise<any>
/**
* Returns the GPU configuration.
*/
getGpuSetting(): Promise<GpuSetting | undefined>
/**
* Returns information about the operating system.
*/
getOsInfo(): Promise<OperatingSystemInfo>
}

View File

@ -1,6 +0,0 @@
export type ResourceInfo = {
mem: {
totalMemory: number
usedMemory: number
}
}

View File

@ -65,7 +65,7 @@ const DropdownDownload = ({ lastRelease }: Props) => {
const userAgent = navigator.userAgent const userAgent = navigator.userAgent
if (userAgent.includes('Windows')) { if (userAgent.includes('Windows')) {
// windows user // windows user
setDefaultSystem(systems[2]) setDefaultSystem(systems[1])
} else if (userAgent.includes('Linux')) { } else if (userAgent.includes('Linux')) {
// linux user // linux user
setDefaultSystem(systems[3]) setDefaultSystem(systems[3])

View File

@ -23,6 +23,4 @@ Adhering to Jan's privacy preserving philosophy, our analytics philosophy is to
## What is tracked ## What is tracked
1. By default, Github tracks downloads and device metadata for all public GitHub repositories. This helps us troubleshoot & ensure cross-platform support. 1. By default, Github tracks downloads and device metadata for all public GitHub repositories. This helps us troubleshoot & ensure cross-platform support.
2. We use [Umami](https://umami.is/) to collect, analyze, and understand application data while maintaining visitor privacy and data ownership. We are using the Umami Cloud in Europe to ensure GDPR compliance. Please see [Umami Privacy Policy](https://umami.is/privacy) for more details. 2. Additionally, we plan to enable a `Settings` feature for users to turn off all tracking.
3. We use Umami to track a single `app.opened` event without additional user metadata, in order to understand retention. In addition, we track `app.version` to understand app version usage.
4. Additionally, we plan to enable a `Settings` feature for users to turn off all tracking.

View File

@ -14,6 +14,11 @@
"desktop": "Installation", "desktop": "Installation",
"data-folder": "Jan Data Folder", "data-folder": "Jan Data Folder",
"privacy": "Privacy", "privacy": "Privacy",
"privacy-policy": {
"type": "page",
"display": "hidden",
"title": "Privacy Policy"
},
"user-guides": { "user-guides": {
"title": "BASIC USAGE", "title": "BASIC USAGE",
"type": "separator" "type": "separator"

View File

@ -55,7 +55,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Conversational", "productName": "Conversational",
"version": "1.0.0", "version": "1.0.0",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables conversations and state persistence via your filesystem", "description": "This extension enables conversations and state persistence via your filesystem.",
"url": "extension://@janhq/conversational-extension/dist/index.js" "url": "extension://@janhq/conversational-extension/dist/index.js"
}, },
"@janhq/inference-anthropic-extension": { "@janhq/inference-anthropic-extension": {
@ -70,7 +70,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Anthropic Inference Engine", "productName": "Anthropic Inference Engine",
"version": "1.0.2", "version": "1.0.2",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables Anthropic chat completion API calls", "description": "This extension enables Anthropic chat completion API calls.",
"url": "extension://@janhq/inference-anthropic-extension/dist/index.js" "url": "extension://@janhq/inference-anthropic-extension/dist/index.js"
}, },
"@janhq/inference-triton-trt-llm-extension": { "@janhq/inference-triton-trt-llm-extension": {
@ -85,7 +85,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Triton-TRT-LLM Inference Engine", "productName": "Triton-TRT-LLM Inference Engine",
"version": "1.0.0", "version": "1.0.0",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option", "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option.",
"url": "extension://@janhq/inference-triton-trt-llm-extension/dist/index.js" "url": "extension://@janhq/inference-triton-trt-llm-extension/dist/index.js"
}, },
"@janhq/inference-mistral-extension": { "@janhq/inference-mistral-extension": {
@ -100,7 +100,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "MistralAI Inference Engine", "productName": "MistralAI Inference Engine",
"version": "1.0.1", "version": "1.0.1",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables Mistral chat completion API calls", "description": "This extension enables Mistral chat completion API calls.",
"url": "extension://@janhq/inference-mistral-extension/dist/index.js" "url": "extension://@janhq/inference-mistral-extension/dist/index.js"
}, },
"@janhq/inference-martian-extension": { "@janhq/inference-martian-extension": {
@ -115,7 +115,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Martian Inference Engine", "productName": "Martian Inference Engine",
"version": "1.0.1", "version": "1.0.1",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables Martian chat completion API calls", "description": "This extension enables Martian chat completion API calls.",
"url": "extension://@janhq/inference-martian-extension/dist/index.js" "url": "extension://@janhq/inference-martian-extension/dist/index.js"
}, },
"@janhq/inference-openrouter-extension": { "@janhq/inference-openrouter-extension": {
@ -130,7 +130,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "OpenRouter Inference Engine", "productName": "OpenRouter Inference Engine",
"version": "1.0.0", "version": "1.0.0",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables Open Router chat completion API calls", "description": "This extension enables Open Router chat completion API calls.",
"url": "extension://@janhq/inference-openrouter-extension/dist/index.js" "url": "extension://@janhq/inference-openrouter-extension/dist/index.js"
}, },
"@janhq/inference-nvidia-extension": { "@janhq/inference-nvidia-extension": {
@ -145,7 +145,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "NVIDIA NIM Inference Engine", "productName": "NVIDIA NIM Inference Engine",
"version": "1.0.1", "version": "1.0.1",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables NVIDIA chat completion API calls", "description": "This extension enables NVIDIA chat completion API calls.",
"url": "extension://@janhq/inference-nvidia-extension/dist/index.js" "url": "extension://@janhq/inference-nvidia-extension/dist/index.js"
}, },
"@janhq/inference-groq-extension": { "@janhq/inference-groq-extension": {
@ -160,7 +160,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Groq Inference Engine", "productName": "Groq Inference Engine",
"version": "1.0.1", "version": "1.0.1",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables fast Groq chat completion API calls", "description": "This extension enables fast Groq chat completion API calls.",
"url": "extension://@janhq/inference-groq-extension/dist/index.js" "url": "extension://@janhq/inference-groq-extension/dist/index.js"
}, },
"@janhq/inference-openai-extension": { "@janhq/inference-openai-extension": {
@ -175,7 +175,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "OpenAI Inference Engine", "productName": "OpenAI Inference Engine",
"version": "1.0.2", "version": "1.0.2",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables OpenAI chat completion API calls", "description": "This extension enables OpenAI chat completion API calls.",
"url": "extension://@janhq/inference-openai-extension/dist/index.js" "url": "extension://@janhq/inference-openai-extension/dist/index.js"
}, },
"@janhq/inference-cohere-extension": { "@janhq/inference-cohere-extension": {
@ -190,7 +190,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Cohere Inference Engine", "productName": "Cohere Inference Engine",
"version": "1.0.0", "version": "1.0.0",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables Cohere chat completion API calls", "description": "This extension enables Cohere chat completion API calls.",
"url": "extension://@janhq/inference-cohere-extension/dist/index.js" "url": "extension://@janhq/inference-cohere-extension/dist/index.js"
}, },
"@janhq/model-extension": { "@janhq/model-extension": {
@ -205,7 +205,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Model Management", "productName": "Model Management",
"version": "1.0.33", "version": "1.0.33",
"main": "dist/index.js", "main": "dist/index.js",
"description": "Model Management Extension provides model exploration and seamless downloads", "description": "Model Management Extension provides model exploration and seamless downloads.",
"url": "extension://@janhq/model-extension/dist/index.js" "url": "extension://@janhq/model-extension/dist/index.js"
}, },
"@janhq/monitoring-extension": { "@janhq/monitoring-extension": {
@ -220,7 +220,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "System Monitoring", "productName": "System Monitoring",
"version": "1.0.10", "version": "1.0.10",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension provides system health and OS level data", "description": "This extension provides system health and OS level data.",
"url": "extension://@janhq/monitoring-extension/dist/index.js" "url": "extension://@janhq/monitoring-extension/dist/index.js"
}, },
"@janhq/assistant-extension": { "@janhq/assistant-extension": {
@ -235,7 +235,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Jan Assistant", "productName": "Jan Assistant",
"version": "1.0.1", "version": "1.0.1",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models", "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models.",
"url": "extension://@janhq/assistant-extension/dist/index.js" "url": "extension://@janhq/assistant-extension/dist/index.js"
}, },
"@janhq/tensorrt-llm-extension": { "@janhq/tensorrt-llm-extension": {

View File

@ -41,7 +41,7 @@ Ensure that your system meets the following requirements to use Jan effectively:
### Mac Performance Guide ### Mac Performance Guide
<Callout type="info"> <Callout type="info">
**Apple Silicon Macs** leverage Metal for GPU acceleration, providing faster performance than **Appple Intel Macs**, which rely solely on CPU processing. **Apple Silicon Macs** leverage Metal for GPU acceleration, providing faster performance than **Apple Intel Macs**, which rely solely on CPU processing.
</Callout> </Callout>
**Apple Silicon (M1, M2, M3)** **Apple Silicon (M1, M2, M3)**
- Metal acceleration enabled by default, no configuration required - Metal acceleration enabled by default, no configuration required

View File

@ -47,8 +47,8 @@ To add a new remote engine:
|-------|-------------|----------| |-------|-------------|----------|
| Engine Name | Name for your engine (e.g., "OpenAI", "Claude") | ✓ | | Engine Name | Name for your engine (e.g., "OpenAI", "Claude") | ✓ |
| API URL | The base URL of the provider's API | ✓ | | API URL | The base URL of the provider's API | ✓ |
| API Key | Your authentication key from the provider | ✓ | | API Key | Your authentication key to activate this engine | ✓ |
| Model List URL | URL for fetching available models | | | Model List URL | The endpoint URL to fetch available models |
| API Key Template | Custom authorization header format | | | API Key Template | Custom authorization header format | |
| Request Format Conversion | Function to convert Jan's request format to provider's format | | | Request Format Conversion | Function to convert Jan's request format to provider's format | |
| Response Format Conversion | Function to convert provider's response format to Jan's format | | | Response Format Conversion | Function to convert provider's response format to Jan's format | |

View File

@ -0,0 +1,125 @@
---
title: Jan Privacy Policy
description: Jan's data collection practices, privacy measures, and your rights. Learn how we protect your data and maintain transparency.
---
# Privacy Policy
<div className="text-sm text-gray-500 mt-2 mb-8">
Last Updated: January 16, 2025
</div>
## Introduction
We are committed to protecting your privacy and ensuring you have control over your data. This Privacy Policy outlines what information Menlo Research Pte Ltd (the "Company") collects from users of the Jan desktop app and website (the "Services"), how the Company uses that information, and the measures the Company takes to safeguard that information.
## 1. Data Collection and Consent
### Explicit Consent
The Company does not collect any data until you explicitly allow tracking.
### Tracking Preferences
Upon first launching the Jan desktop app or visiting the website, you will be prompted to set your tracking preferences. These preferences can be modified at any time via the app's Settings menu or the website's Privacy Settings.
### Legal Basis
Pursuant to the European Union's General Data Protection Regulation (EU) 2016/679 (the "GDPR"), the Company processes data based on your explicit consent (GDPR Article 6(1)(a)). This means:
- The Company only processes your data after receiving clear, affirmative consent from you.
- You may withdraw your consent at any time through the app's Settings menu or the website's Privacy Settings.
- If you withdraw your consent, the Company will stop optional data collection from the effective date of withdrawal.
- Your withdrawal of consent will not affect the lawfulness of processing before its withdrawal.
## 2. Data We Do Not Collect
Regardless of your analytics permissions, the Company does not collect the following:
- Chat History: Your conversations with the Jan app are private and inaccessible to the Company.
- Chat Settings: Your personalized settings remain solely with you.
- Language Models: The specific language models you use are not tracked.
## 3. Uses of Information
To build a reliable and user-friendly product offering, understanding how the Jan app is used is essential. If you permit tracking, the Company collects product analytics data to:
- Improve User Experience: Enhance app functionality based on usage patterns; and
- Measure Engagement: Assess active users and retention rates to ensure ongoing value.
## 4. Product Analytics
### Data Collected
When you opt-in to tracking, we collect the following anonymous data:
- Active Users: Number of daily active users to gauge engagement.
- Retention Rates: Track if users continue to find value in the Jan app over time.
### Data Anonymity
- User ID: Analytics data is tied to a randomly generated user ID, ensuring no link to your personal identity.
- Privacy Assurance: Your chat history and personal data are not tracked or linked to your usage data.
## 5. What We Do Not Track
Even with analytics permissions granted, the Company does not track the following:
- Conversations: Your interactions with the Jan app remain private.
- Files: The Company does not scan, upload, or view your files.
- Personal Identity: The Company does not collect personally identifiable information about users.
- Prompts: Your prompts and prompt templates are not monitored.
- Conversation Metrics: The Company does not track context length or conversation length.
- Model Usage: The specific models you use or their types are not tracked.
- Storage: You retain full control over storing your files and logs, and your privacy is prioritized.
## 6. Using Cloud Models
The Jan app allows you to connect to cloud-based model APIs (e.g. GPT, Claude models).
- Data Handling: The API provider processes your messages directly; the Jan app does not access or store these messages.
- Local Models: Choosing local models ensures all data remains on your device, with no external access.
## 7. Data Storage and Processing
### Analytics Provider
The Company uses PostHog EU for analytics, which ensures all data is processed within the European Union.
### Data Security
- Encryption: All data transfers are encrypted using Transport Layer Security (TLS) to ensure secure transmission.
- Storage: PostHog securely manages the data the Company collects. For more information, please refer to PostHog's GDPR documentation.
## 8. Data Retention
- Retention Period: The Company retains analytics data for up to 12 months unless otherwise required to comply with any applicable legal requirements.
- Deletion Requests: If you wish to request the deletion of your analytics data, you may do so by sending a written request to hello@jan.ai.
## 9. Your Rights and Choices
- Access and Control: You may access, modify, or delete your tracking preferences at any time through the Jan app or website settings.
- Data Requests: If you have any requests related to your data, please address them to hello@jan.ai.
## 10. Children's Privacy
Our Services are not targeted at children under the age of 13. The Company does not knowingly collect data from children under the age of 13. If the Company becomes aware that data of persons under the age of 13 has been collected without verifiable parental consent, the Company will take appropriate actions to delete this information.
## 11. Changes to the Privacy Policy
The Company reserves the right, at its sole discretion, to update this Privacy Policy at any time to reflect changes in the practices or legal requirements of the Company. The Company will use reasonable efforts to notify you of any significant changes via app notifications, the website, or email. Your continued use of the Services following such updates means you accept those changes.
## 12. Cookies and Tracking Technologies
Our website utilizes cookies to:
- Enhance user experience; and
- Measure website traffic and usage patterns.
Most browsers allow you to remove or manage cookie functions and adjust your privacy and security preferences.
For more details, please refer to our Cookie Policy.
## 13. Contact Us
For any questions or concerns about this Privacy Policy or our data practices, please contact hello@jan.ai.

View File

@ -1,5 +1,5 @@
--- ---
title: Jan Privacy title: Jan's Privacy Approach
description: Jan is an app that allows you to own your AI. We prioritize your control over your data and explain what data we collect and why. description: Jan is an app that allows you to own your AI. We prioritize your control over your data and explain what data we collect and why.
keywords: keywords:
[ [
@ -19,45 +19,57 @@ keywords:
] ]
--- ---
---
import { Callout } from 'nextra/components'
# Privacy # Privacy
Jan is an app that allows you to own your AI. We prioritize your control over your data and explain what data we collect and why. Jan is an app that allows you to own your AI. We prioritize local AI models and your control over your data. This page explains what data we collect and why. No tricks.
- Jan can't see your chats with AI <Callout>
- You're free to opt out For a comprehensive overview of our privacy practices, you can read our full [Privacy Policy](/docs/privacy-policy).
</Callout>
<Callout type="info">
We don't collect any data until you explicitly allow tracking.
</Callout>
You'll be asked about your tracking preferences when you first launch the app, and you can change them at any time in Settings.
Regardless of your analytics permissions, Jan will **never** access your chat history, chat settings, or the language models you have used.
## Why and what we track ## Why and what we track
To build a reliable, user-friendly AI that you own, we need to understand how Jan is used. We collect two types of data: performance data and usage data. To build a reliable, user-friendly AI that you own, we need to understand how Jan is used. If users allowed us to track, we collect product analytics data.
### Performance data ### Product Analytics
We track app crashes and collect technical details about what went wrong, along with basic information about the hardware youre using.
When Jan crashes, we collect technical details about what went wrong.
- Specific AI model in use during the crash
- Hardware: `CPU`, `GPU`, `RAM`
- Logs: `Date/Time`, `OS & version`, `app version`, `error codes & messages`.
### Usage data
We track data like how often the app is opened to check: We track data like how often the app is opened to check:
- **Active Users**: How many people use Jan daily to measure engagement - **Active Users**: How many people use Jan daily to measure engagement
- **Retention Rates**: To understand if users are finding value in Jan over time - **Retention Rates**: To understand if users are finding value in Jan over time
Usage data is tied to a randomly generated telemetry ID. None of our usage data can be linked to your personal identity. Product analytics data is tied to a randomly generated user ID. None of our usage data can be linked to your personal identity. Your chat history and personal data are never tracked.
## What we **dont** track: ## What we **don't** track
- Your conversations with Jan. Those stay on your device.
- Your files. We dont scan, upload, or even look at them. <Callout type="info">
- Anything tied to your identity. Even if you grant analytics permissions, Jan doesn't track many of your private activities.
</Callout>
- We don't track your conversations with Jan.
- We don't scan, upload, or look at your files.
- We don't collect anything tied to your identity.
- We don't track your prompts and prompt templates.
- We don't monitor context length or conversation length.
- We don't track the models you have used or their types.
You store the files and logs that are a priority for your privacy yourself.
## Using Cloud Models ## Using Cloud Models
Jan allows you to connect cloud model APIs. If you choose to use cloud-based models (e.g. GPT, Claude models), the API provider handling the model will have access to your messages as part of processing the request. Again, Jan doesn't see or store these messages - they go directly to the provider. Remember: With local models, everything stays on your device, so no one - not even us- can see your messages. Jan allows you to connect cloud model APIs. If you choose to use cloud-based models (e.g. GPT, Claude models), the API provider handling the model will have access to your messages as part of processing the request. Again, Jan doesn't see or store these messages - they go directly to the provider. Remember: With local models, everything stays on your device, so no one - not even us- can see your messages.
## Where we store & process data ## Where we store & process data
We use [PostHog](https://posthog.com/eu) EU for analytics, ensuring all data is processed within the European Union. This setup complies with GDPR and other strict privacy regulations. PostHog lets us self-host and securely manage the data we collect. Read more [on PostHog's GDPR doc](https://posthog.com/docs/privacy/gdpr-compliance). We use [PostHog](https://posthog.com/eu) EU for analytics, ensuring all data is processed within the European Union. This setup complies with GDPR and other strict privacy regulations. PostHog lets us securely manage the data we collect. Read more [on PostHog's GDPR doc](https://posthog.com/docs/privacy/gdpr-compliance).
For a detailed breakdown of the analytics data we collect, you can check out our analytics repo. If you have any questions or concerns, feel free to reach out to us at hi@jan.ai.

View File

@ -1,6 +1,6 @@
--- ---
title: "Jan: Open source ChatGPT-alternative that runs 100% offline" title: "Jan: Open source ChatGPT-alternative that runs 100% offline"
description: "Chat with AI without privact concerns. Jan is an open-source alternative to ChatGPT, running AI models locally on your device." description: "Chat with AI without privacy concerns. Jan is an open-source alternative to ChatGPT, running AI models locally on your device."
keywords: keywords:
[ [
Jan, Jan,

View File

@ -1,47 +0,0 @@
---
title: Raycast
keywords:
[
Jan,
Customizable Intelligence, LLM,
local AI,
privacy focus,
free and open source,
private and offline,
conversational AI,
no-subscription fee,
large language models,
raycast integration,
Raycast,
]
description: A step-by-step guide on integrating Jan with Raycast.
---
import { Steps } from 'nextra/components'
# Raycast
## Integrate Raycast with Jan
[Raycast](https://www.raycast.com/) is a productivity tool designed for macOS that enhances workflow efficiency by providing quick access to various tasks and functionalities through a keyboard-driven interface. To integrate Raycast with Jan, follow the steps below:
<Steps>
### Step 1: Download the TinyLlama Model
1. Open Jan app.
2. Go to the **Hub** and download the TinyLlama model.
3. The model will be available at `~jan/models/tinyllama-1.1b`.
### Step 2: Clone and Run the Program
1. Clone this [GitHub repository](https://github.com/InNoobWeTrust/nitro-raycast).
2. Execute the project using the following command:
```bash title="Node.js"
npm i && npm run dev
```
### Step 3: Search for Nitro and Run the Model
Search for `Nitro` using the program, and you can use the models from Jan in RayCast.
</Steps>

View File

@ -24,13 +24,6 @@ Jan runs with privacy by default and is used 100% offline on your own computer.
If you use a Remote AI API (e.g., OpenAI API, Groq API), your data will naturally travel to their servers. They will be subject to the privacy policy of the respective API provider. If you use a Remote AI API (e.g., OpenAI API, Groq API), your data will naturally travel to their servers. They will be subject to the privacy policy of the respective API provider.
</Callout> </Callout>
Jan uses [Umami](https://umami.is/) for analytics, which is a privacy-focused, GDPR-compliant analytics tool that does not track personal information. We use this to get aggregate reports on OS and hardware types and prioritize our engineering roadmap. As per [Umami's Privacy Policy](https://umami.is/privacy), Umami uses the following data points to generate its reports:
- OS and device characteristics
- IP address
Jan does not get any of this data, and we do not track IP addresses or other identifying information. We are actively looking into more privacy-respecting ways to handle analytics, crash reports, and telemetry and would love to work with the community on this.
### Cortex ### Cortex
Cortex is a library that runs large language models (LLMs) locally on your computer. Cortex does not collect any personal information. Cortex is a library that runs large language models (LLMs) locally on your computer. Cortex does not collect any personal information.

View File

@ -28,6 +28,7 @@ import { setupReactDevTool } from './utils/dev'
import { trayManager } from './managers/tray' import { trayManager } from './managers/tray'
import { logSystemInfo } from './utils/system' import { logSystemInfo } from './utils/system'
import { registerGlobalShortcuts } from './utils/shortcut' import { registerGlobalShortcuts } from './utils/shortcut'
import { registerLogger } from './utils/logger'
const preloadPath = join(__dirname, 'preload.js') const preloadPath = join(__dirname, 'preload.js')
const rendererPath = join(__dirname, '..', 'renderer') const rendererPath = join(__dirname, '..', 'renderer')
@ -79,6 +80,7 @@ app
}) })
.then(setupCore) .then(setupCore)
.then(createUserSpace) .then(createUserSpace)
.then(registerLogger)
.then(migrate) .then(migrate)
.then(setupExtensions) .then(setupExtensions)
.then(setupMenu) .then(setupMenu)

View File

@ -1,6 +1,6 @@
{ {
"name": "jan", "name": "jan",
"version": "0.1.4", "version": "0.1.1737985524",
"main": "./build/main.js", "main": "./build/main.js",
"author": "Jan <service@jan.ai>", "author": "Jan <service@jan.ai>",
"license": "MIT", "license": "MIT",

View File

@ -1,16 +1,28 @@
import fs from 'fs' import {
createWriteStream,
existsSync,
mkdirSync,
readdir,
stat,
unlink,
writeFileSync,
} from 'fs'
import util from 'util' import util from 'util'
import { import {
getAppConfigurations, getAppConfigurations,
getJanDataFolderPath, getJanDataFolderPath,
Logger, Logger,
LoggerManager,
} from '@janhq/core/node' } from '@janhq/core/node'
import path, { join } from 'path' import path, { join } from 'path'
export class FileLogger extends Logger { /**
* File Logger
*/
export class FileLogger implements Logger {
name = 'file' name = 'file'
logCleaningInterval: number = 120000 logCleaningInterval: number = 120000
timeout: NodeJS.Timeout | null = null timeout: NodeJS.Timeout | undefined
appLogPath: string = './' appLogPath: string = './'
logEnabled: boolean = true logEnabled: boolean = true
@ -18,14 +30,13 @@ export class FileLogger extends Logger {
logEnabled: boolean = true, logEnabled: boolean = true,
logCleaningInterval: number = 120000 logCleaningInterval: number = 120000
) { ) {
super()
this.logEnabled = logEnabled this.logEnabled = logEnabled
if (logCleaningInterval) this.logCleaningInterval = logCleaningInterval if (logCleaningInterval) this.logCleaningInterval = logCleaningInterval
const appConfigurations = getAppConfigurations() const appConfigurations = getAppConfigurations()
const logFolderPath = join(appConfigurations.data_folder, 'logs') const logFolderPath = join(appConfigurations.data_folder, 'logs')
if (!fs.existsSync(logFolderPath)) { if (!existsSync(logFolderPath)) {
fs.mkdirSync(logFolderPath, { recursive: true }) mkdirSync(logFolderPath, { recursive: true })
} }
this.appLogPath = join(logFolderPath, 'app.log') this.appLogPath = join(logFolderPath, 'app.log')
@ -69,8 +80,8 @@ export class FileLogger extends Logger {
const logDirectory = path.join(getJanDataFolderPath(), 'logs') const logDirectory = path.join(getJanDataFolderPath(), 'logs')
// Perform log cleaning // Perform log cleaning
const currentDate = new Date() const currentDate = new Date()
if (fs.existsSync(logDirectory)) if (existsSync(logDirectory))
fs.readdir(logDirectory, (err, files) => { readdir(logDirectory, (err, files) => {
if (err) { if (err) {
console.error('Error reading log directory:', err) console.error('Error reading log directory:', err)
return return
@ -78,7 +89,7 @@ export class FileLogger extends Logger {
files.forEach((file) => { files.forEach((file) => {
const filePath = path.join(logDirectory, file) const filePath = path.join(logDirectory, file)
fs.stat(filePath, (err, stats) => { stat(filePath, (err, stats) => {
if (err) { if (err) {
console.error('Error getting file stats:', err) console.error('Error getting file stats:', err)
return return
@ -86,7 +97,7 @@ export class FileLogger extends Logger {
// Check size // Check size
if (stats.size > size) { if (stats.size > size) {
fs.unlink(filePath, (err) => { unlink(filePath, (err) => {
if (err) { if (err) {
console.error('Error deleting log file:', err) console.error('Error deleting log file:', err)
return return
@ -103,7 +114,7 @@ export class FileLogger extends Logger {
(1000 * 3600 * 24) (1000 * 3600 * 24)
) )
if (daysDifference > days) { if (daysDifference > days) {
fs.unlink(filePath, (err) => { unlink(filePath, (err) => {
if (err) { if (err) {
console.error('Error deleting log file:', err) console.error('Error deleting log file:', err)
return return
@ -124,15 +135,20 @@ export class FileLogger extends Logger {
} }
} }
/**
* Write log function implementation
* @param message
* @param logPath
*/
const writeLog = (message: string, logPath: string) => { const writeLog = (message: string, logPath: string) => {
if (!fs.existsSync(logPath)) { if (!existsSync(logPath)) {
const logDirectory = path.join(getJanDataFolderPath(), 'logs') const logDirectory = path.join(getJanDataFolderPath(), 'logs')
if (!fs.existsSync(logDirectory)) { if (!existsSync(logDirectory)) {
fs.mkdirSync(logDirectory) mkdirSync(logDirectory)
} }
fs.writeFileSync(logPath, message) writeFileSync(logPath, message)
} else { } else {
const logFile = fs.createWriteStream(logPath, { const logFile = createWriteStream(logPath, {
flags: 'a', flags: 'a',
}) })
logFile.write(util.format(message) + '\n') logFile.write(util.format(message) + '\n')
@ -140,3 +156,12 @@ const writeLog = (message: string, logPath: string) => {
console.debug(message) console.debug(message)
} }
} }
/**
* Register logger for global application logging
*/
export const registerLogger = () => {
const logger = new FileLogger()
LoggerManager.instance().register(logger)
logger.cleanLogs()
}

View File

@ -63,7 +63,7 @@ export default class CortexConversationalExtension extends ConversationalExtensi
async modifyThread(thread: Thread): Promise<void> { async modifyThread(thread: Thread): Promise<void> {
return this.queue return this.queue
.add(() => .add(() =>
ky.post(`${API_URL}/v1/threads/${thread.id}`, { json: thread }) ky.patch(`${API_URL}/v1/threads/${thread.id}`, { json: thread })
) )
.then() .then()
} }
@ -101,7 +101,7 @@ export default class CortexConversationalExtension extends ConversationalExtensi
async modifyMessage(message: ThreadMessage): Promise<ThreadMessage> { async modifyMessage(message: ThreadMessage): Promise<ThreadMessage> {
return this.queue.add(() => return this.queue.add(() =>
ky ky
.post( .patch(
`${API_URL}/v1/threads/${message.thread_id}/messages/${message.id}`, `${API_URL}/v1/threads/${message.thread_id}/messages/${message.id}`,
{ {
json: message, json: message,

View File

@ -6,6 +6,8 @@ import groq from './resources/groq.json' with { type: 'json' }
import martian from './resources/martian.json' with { type: 'json' } import martian from './resources/martian.json' with { type: 'json' }
import mistral from './resources/mistral.json' with { type: 'json' } import mistral from './resources/mistral.json' with { type: 'json' }
import nvidia from './resources/nvidia.json' with { type: 'json' } import nvidia from './resources/nvidia.json' with { type: 'json' }
import deepseek from './resources/deepseek.json' with { type: 'json' }
import googleGemini from './resources/google_gemini.json' with { type: 'json' }
import anthropicModels from './models/anthropic.json' with { type: 'json' } import anthropicModels from './models/anthropic.json' with { type: 'json' }
import cohereModels from './models/cohere.json' with { type: 'json' } import cohereModels from './models/cohere.json' with { type: 'json' }
@ -15,6 +17,8 @@ import groqModels from './models/groq.json' with { type: 'json' }
import martianModels from './models/martian.json' with { type: 'json' } import martianModels from './models/martian.json' with { type: 'json' }
import mistralModels from './models/mistral.json' with { type: 'json' } import mistralModels from './models/mistral.json' with { type: 'json' }
import nvidiaModels from './models/nvidia.json' with { type: 'json' } import nvidiaModels from './models/nvidia.json' with { type: 'json' }
import deepseekModels from './models/deepseek.json' with { type: 'json' }
import googleGeminiModels from './models/google_gemini.json' with { type: 'json' }
const engines = [ const engines = [
anthropic, anthropic,
@ -25,6 +29,8 @@ const engines = [
mistral, mistral,
martian, martian,
nvidia, nvidia,
deepseek,
googleGemini,
] ]
const models = [ const models = [
...anthropicModels, ...anthropicModels,
@ -35,5 +41,7 @@ const models = [
...mistralModels, ...mistralModels,
...martianModels, ...martianModels,
...nvidiaModels, ...nvidiaModels,
...deepseekModels,
...googleGeminiModels,
] ]
export { engines, models } export { engines, models }

View File

@ -8,6 +8,7 @@
"inference_params": { "inference_params": {
"max_tokens": 4096, "max_tokens": 4096,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"stream": true "stream": true
}, },
"engine": "anthropic" "engine": "anthropic"
@ -21,6 +22,7 @@
"inference_params": { "inference_params": {
"max_tokens": 8192, "max_tokens": 8192,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"stream": true "stream": true
}, },
"engine": "anthropic" "engine": "anthropic"
@ -34,6 +36,7 @@
"inference_params": { "inference_params": {
"max_tokens": 8192, "max_tokens": 8192,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"stream": true "stream": true
}, },
"engine": "anthropic" "engine": "anthropic"

View File

@ -8,6 +8,7 @@
"inference_params": { "inference_params": {
"max_tokens": 4096, "max_tokens": 4096,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"stream": false "stream": false
}, },
"engine": "cohere" "engine": "cohere"
@ -21,6 +22,7 @@
"inference_params": { "inference_params": {
"max_tokens": 4096, "max_tokens": 4096,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"stream": false "stream": false
}, },
"engine": "cohere" "engine": "cohere"

View File

@ -0,0 +1,28 @@
[
{
"model": "deepseek-chat",
"object": "model",
"name": "DeepSeek Chat",
"version": "1.0",
"description": "The deepseek-chat model has been upgraded to DeepSeek-V3. deepseek-reasoner points to the new model DeepSeek-R1",
"inference_params": {
"max_tokens": 8192,
"temperature": 0.6,
"stream": true
},
"engine": "deepseek"
},
{
"model": "deepseek-reasoner",
"object": "model",
"name": "DeepSeek R1",
"version": "1.0",
"description": "CoT (Chain of Thought) is the reasoning content deepseek-reasoner gives before output the final answer. For details, please refer to Reasoning Model.",
"inference_params": {
"max_tokens": 8192,
"temperature": 0.6,
"stream": true
},
"engine": "deepseek"
}
]

View File

@ -0,0 +1,67 @@
[
{
"model": "gemini-2.0-flash",
"object": "model",
"name": "Gemini 2.0 Flash",
"version": "1.0",
"description": "Gemini 2.0 Flash delivers next-gen features and improved capabilities, including superior speed, native tool use, multimodal generation, and a 1M token context window.",
"inference_params": {
"max_tokens": 8192,
"temperature": 0.6,
"stream": true
},
"engine": "google_gemini"
},
{
"model": "gemini-2.0-flash-lite-preview",
"object": "model",
"name": "Gemini 2.0 Flash-Lite Preview",
"version": "1.0",
"description": "A Gemini 2.0 Flash model optimized for cost efficiency and low latency.",
"inference_params": {
"max_tokens": 8192,
"temperature": 0.6,
"stream": true
},
"engine": "google_gemini"
},
{
"model": "gemini-1.5-flash",
"object": "model",
"name": "Gemini 1.5 Flash",
"version": "1.0",
"description": "Gemini 1.5 Flash is a fast and versatile multimodal model for scaling across diverse tasks.",
"inference_params": {
"max_tokens": 8192,
"temperature": 0.6,
"stream": true
},
"engine": "google_gemini"
},
{
"model": "gemini-1.5-flash-8b",
"object": "model",
"name": "Gemini 1.5 Flash-8B",
"version": "1.0",
"description": "Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks.",
"inference_params": {
"max_tokens": 8192,
"temperature": 0.6,
"stream": true
},
"engine": "google_gemini"
},
{
"model": "gemini-1.5-pro",
"object": "model",
"name": "Gemini 1.5 Pro",
"version": "1.0",
"description": "Gemini 1.5 Pro is a mid-size multimodal model that is optimized for a wide-range of reasoning tasks. 1.5 Pro can process large amounts of data at once, including 2 hours of video, 19 hours of audio, codebases with 60,000 lines of code, or 2,000 pages of text. ",
"inference_params": {
"max_tokens": 8192,
"temperature": 0.6,
"stream": true
},
"engine": "google_gemini"
}
]

View File

@ -8,6 +8,7 @@
"inference_params": { "inference_params": {
"max_tokens": 32000, "max_tokens": 32000,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"top_p": 0.95, "top_p": 0.95,
"stream": true "stream": true
}, },
@ -22,6 +23,7 @@
"inference_params": { "inference_params": {
"max_tokens": 32000, "max_tokens": 32000,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"top_p": 0.95, "top_p": 0.95,
"stream": true "stream": true
}, },
@ -36,6 +38,7 @@
"inference_params": { "inference_params": {
"max_tokens": 32000, "max_tokens": 32000,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"top_p": 0.95, "top_p": 0.95,
"stream": true "stream": true
}, },

View File

@ -8,6 +8,7 @@
"inference_params": { "inference_params": {
"max_tokens": 1024, "max_tokens": 1024,
"temperature": 0.3, "temperature": 0.3,
"max_temperature": 1.0,
"top_p": 1, "top_p": 1,
"stream": false, "stream": false,
"frequency_penalty": 0, "frequency_penalty": 0,

View File

@ -79,12 +79,7 @@
"description": "OpenAI o1 is a new model with complex reasoning", "description": "OpenAI o1 is a new model with complex reasoning",
"format": "api", "format": "api",
"inference_params": { "inference_params": {
"max_tokens": 100000, "max_tokens": 100000
"temperature": 1,
"top_p": 1,
"stream": true,
"frequency_penalty": 0,
"presence_penalty": 0
}, },
"engine": "openai" "engine": "openai"
}, },
@ -97,11 +92,7 @@
"format": "api", "format": "api",
"inference_params": { "inference_params": {
"max_tokens": 32768, "max_tokens": 32768,
"temperature": 1, "stream": true
"top_p": 1,
"stream": true,
"frequency_penalty": 0,
"presence_penalty": 0
}, },
"engine": "openai" "engine": "openai"
}, },
@ -114,11 +105,20 @@
"format": "api", "format": "api",
"inference_params": { "inference_params": {
"max_tokens": 65536, "max_tokens": 65536,
"temperature": 1, "stream": true
"top_p": 1, },
"stream": true, "engine": "openai"
"frequency_penalty": 0, },
"presence_penalty": 0 {
"model": "o3-mini",
"object": "model",
"name": "OpenAI o3-mini",
"version": "1.0",
"description": "OpenAI most recent reasoning model, providing high intelligence at the same cost and latency targets of o1-mini.",
"format": "api",
"inference_params": {
"max_tokens": 100000,
"stream": true
}, },
"engine": "openai" "engine": "openai"
} }

View File

@ -1,16 +1,91 @@
[ [
{ {
"model": "open-router-auto", "model": "deepseek/deepseek-r1:free",
"object": "model", "object": "model",
"name": "OpenRouter", "name": "DeepSeek: R1",
"version": "1.0", "version": "1.0",
"description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.", "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"inference_params": { "inference_params": {
"max_tokens": 128000,
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0,
"stream": true
},
"engine": "openrouter"
},
{
"model": "deepseek/deepseek-r1-distill-llama-70b:free",
"object": "model",
"name": "DeepSeek: R1 Distill Llama 70B",
"version": "1.0",
"description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"inference_params": {
"temperature": 0.7,
"top_p": 0.95,
"frequency_penalty": 0,
"presence_penalty": 0,
"stream": true
},
"engine": "openrouter"
},
{
"model": "deepseek/deepseek-r1-distill-llama-70b:free",
"object": "model",
"name": "DeepSeek: R1 Distill Llama 70B",
"version": "1.0",
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"inference_params": {
"temperature": 0.7,
"top_p": 0.95,
"frequency_penalty": 0,
"presence_penalty": 0,
"stream": true
},
"engine": "openrouter"
},
{
"model": "meta-llama/llama-3.1-405b-instruct:free",
"object": "model",
"name": "Meta: Llama 3.1 405B Instruct",
"version": "1.0",
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"inference_params": {
"temperature": 0.7,
"top_p": 0.95,
"frequency_penalty": 0,
"presence_penalty": 0,
"stream": true
},
"engine": "openrouter"
},
{
"model": "qwen/qwen-vl-plus:free",
"object": "model",
"name": "Qwen: Qwen VL Plus",
"version": "1.0",
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"inference_params": {
"temperature": 0.7,
"top_p": 0.95,
"frequency_penalty": 0,
"presence_penalty": 0,
"stream": true
},
"engine": "openrouter"
},
{
"model": "qwen/qwen2.5-vl-72b-instruct:free",
"object": "model",
"name": "Qwen: Qwen2.5 VL 72B Instruct",
"version": "1.0",
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"inference_params": {
"temperature": 0.7,
"top_p": 0.95,
"frequency_penalty": 0,
"presence_penalty": 0,
"stream": true
}, },
"engine": "openrouter" "engine": "openrouter"
} }

View File

@ -29,12 +29,10 @@
}, },
"dependencies": { "dependencies": {
"@janhq/core": "../../core/package.tgz", "@janhq/core": "../../core/package.tgz",
"cpu-instructions": "^0.0.13",
"ky": "^1.7.2", "ky": "^1.7.2",
"p-queue": "^8.0.1" "p-queue": "^8.0.1"
}, },
"bundledDependencies": [ "bundledDependencies": [
"cpu-instructions",
"@janhq/core" "@janhq/core"
], ],
"engines": { "engines": {

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-anthropic-extension", "id": "anthropic",
"type": "remote", "type": "remote",
"engine": "anthropic", "engine": "anthropic",
"url": "https://console.anthropic.com/settings/keys", "url": "https://console.anthropic.com/settings/keys",
@ -10,13 +10,14 @@
"transform_req": { "transform_req": {
"chat_completions": { "chat_completions": {
"url": "https://api.anthropic.com/v1/messages", "url": "https://api.anthropic.com/v1/messages",
"template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": \"{{ input_request.messages.0.content }}\", \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": \"{{ message.role}}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }" "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": {{ tojson(input_request.messages.0.content) }}, \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"metadata\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
} }
}, },
"transform_resp": { "transform_resp": {
"chat_completions": { "chat_completions": {
"template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": \"{{ input_request.model }}\", \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"{{ input_request.role }}\", \"content\": {% if input_request.content and input_request.content.0.type == \"text\" %} \"{{input_request.content.0.text}}\" {% else %} null {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.stop_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.usage.input_tokens }}, \"completion_tokens\": {{ input_request.usage.output_tokens }}, \"total_tokens\": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 }, \"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}" "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {{tojson(input_request)}} {% endif %}"
}
} }
},
"explore_models_url": "https://docs.anthropic.com/en/docs/about-claude/models"
} }
} }

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-cohere-extension", "id": "cohere",
"type": "remote", "type": "remote",
"engine": "cohere", "engine": "cohere",
"url": "https://dashboard.cohere.com/api-keys", "url": "https://dashboard.cohere.com/api-keys",
@ -10,13 +10,14 @@
"transform_req": { "transform_req": {
"chat_completions": { "chat_completions": {
"url": "https://api.cohere.ai/v1/chat", "url": "https://api.cohere.ai/v1/chat",
"template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": \"{{ input_request.messages.0.content }}\", {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }" "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": {{ tojson(input_request.messages.0.content) }}, {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
} }
}, },
"transform_resp": { "transform_resp": {
"chat_completions": { "chat_completions": {
"template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.event_type == \"text-generation\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.text }}\" {% else %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.event_type == \"stream-end\" %} \"finish_reason\": \"{{ input_request.finish_reason }}\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.generation_id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": {% if input_request.model %} \"{{ input_request.model }}\" {% else %} \"command-r-plus-08-2024\" {% endif %}, \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"assistant\", \"content\": {% if not input_request.text %} null {% else %} \"{{ input_request.text }}\" {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.finish_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.meta.tokens.input_tokens }}, \"completion_tokens\": {{ input_request.meta.tokens.output_tokens }},\"total_tokens\": {{ input_request.meta.tokens.input_tokens + input_request.meta.tokens.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 },\"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}" "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.event_type == \"text-generation\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.text }}\" {% else %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.event_type == \"stream-end\" %} \"finish_reason\": \"{{ input_request.finish_reason }}\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.generation_id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": {% if input_request.model %} \"{{ input_request.model }}\" {% else %} \"command-r-plus-08-2024\" {% endif %}, \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"assistant\", \"content\": {% if not input_request.text %} null {% else %} \"{{ input_request.text }}\" {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.finish_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.meta.tokens.input_tokens }}, \"completion_tokens\": {{ input_request.meta.tokens.output_tokens }},\"total_tokens\": {{ input_request.meta.tokens.input_tokens + input_request.meta.tokens.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 },\"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
} }
} },
"explore_models_url": "https://docs.cohere.com/v2/docs/models"
} }
} }

View File

@ -0,0 +1,23 @@
{
"id": "deepseek",
"type": "remote",
"engine": "deepseek",
"url": "https://platform.deepseek.com/api_keys",
"api_key": "",
"metadata": {
"get_models_url": "https://api.deepseek.com/models",
"header_template": "Authorization: Bearer {{api_key}}",
"transform_req": {
"chat_completions": {
"url": "https://api.deepseek.com/chat/completions",
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
}
},
"transform_resp": {
"chat_completions": {
"template": "{{tojson(input_request)}}"
}
},
"explore_models_url": "https://api-docs.deepseek.com/quick_start/pricing"
}
}

View File

@ -0,0 +1,23 @@
{
"id": "google_gemini",
"type": "remote",
"engine": "google_gemini",
"url": "https://aistudio.google.com/apikey",
"api_key": "",
"metadata": {
"get_models_url": "https://generativelanguage.googleapis.com/v1beta/models",
"header_template": "Authorization: Bearer {{api_key}}",
"transform_req": {
"chat_completions": {
"url": "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
}
},
"transform_resp": {
"chat_completions": {
"template": "{{tojson(input_request)}}"
}
},
"explore_models_url": "https://ai.google.dev/gemini-api/docs/models/gemini"
}
}

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-groq-extension", "id": "groq",
"type": "remote", "type": "remote",
"engine": "groq", "engine": "groq",
"url": "https://console.groq.com/keys", "url": "https://console.groq.com/keys",
@ -15,8 +15,9 @@
}, },
"transform_resp": { "transform_resp": {
"chat_completions": { "chat_completions": {
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" "template": "{{tojson(input_request)}}"
}
} }
},
"explore_models_url": "https://console.groq.com/docs/models"
} }
} }

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-martian-extension", "id": "martian",
"type": "remote", "type": "remote",
"engine": "martian", "engine": "martian",
"url": "https://withmartian.com/dashboard", "url": "https://withmartian.com/dashboard",
@ -15,8 +15,9 @@
}, },
"transform_resp": { "transform_resp": {
"chat_completions": { "chat_completions": {
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" "template": "{{tojson(input_request)}}"
}
} }
},
"explore_models_url": "https://withmartian.github.io/llm-adapters/"
} }
} }

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-mistral-extension", "id": "mistral",
"type": "remote", "type": "remote",
"engine": "mistral", "engine": "mistral",
"url": "https://console.mistral.ai/api-keys/", "url": "https://console.mistral.ai/api-keys/",
@ -17,6 +17,7 @@
"chat_completions": { "chat_completions": {
"template": "{{tojson(input_request)}}" "template": "{{tojson(input_request)}}"
} }
} },
"explore_models_url": "https://docs.mistral.ai/getting-started/models/models_overview/"
} }
} }

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-nvidia-extension", "id": "nvidia",
"type": "remote", "type": "remote",
"engine": "nvidia", "engine": "nvidia",
"url": "https://org.ngc.nvidia.com/setup/personal-keys", "url": "https://org.ngc.nvidia.com/setup/personal-keys",
@ -15,8 +15,9 @@
}, },
"transform_resp": { "transform_resp": {
"chat_completions": { "chat_completions": {
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" "template": "{{tojson(input_request)}}"
}
} }
},
"explore_models_url": "https://build.nvidia.com/models"
} }
} }

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-openai-extension", "id": "openai",
"type": "remote", "type": "remote",
"engine": "openai", "engine": "openai",
"url": "https://platform.openai.com/account/api-keys", "url": "https://platform.openai.com/account/api-keys",
@ -10,13 +10,14 @@
"transform_req": { "transform_req": {
"chat_completions": { "chat_completions": {
"url": "https://api.openai.com/v1/chat/completions", "url": "https://api.openai.com/v1/chat/completions",
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }" "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% set first = false %} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
} }
}, },
"transform_resp": { "transform_resp": {
"chat_completions": { "chat_completions": {
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" "template": "{{tojson(input_request)}}"
}
} }
},
"explore_models_url": "https://platform.openai.com/docs/models"
} }
} }

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-openrouter-extension", "id": "openrouter",
"type": "remote", "type": "remote",
"engine": "openrouter", "engine": "openrouter",
"url": "https://openrouter.ai/keys", "url": "https://openrouter.ai/keys",
@ -10,13 +10,14 @@
"transform_req": { "transform_req": {
"chat_completions": { "chat_completions": {
"url": "https://openrouter.ai/api/v1/chat/completions", "url": "https://openrouter.ai/api/v1/chat/completions",
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
} }
}, },
"transform_resp": { "transform_resp": {
"chat_completions": { "chat_completions": {
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" "template": "{{tojson(input_request)}}"
}
} }
},
"explore_models_url": "https://openrouter.ai/models"
} }
} }

View File

@ -13,9 +13,19 @@ export default defineConfig([
NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`), NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
API_URL: JSON.stringify('http://127.0.0.1:39291'), API_URL: JSON.stringify('http://127.0.0.1:39291'),
SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'), SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.46'), PLATFORM: JSON.stringify(process.platform),
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
DEFAULT_REMOTE_ENGINES: JSON.stringify(engines), DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
DEFAULT_REMOTE_MODELS: JSON.stringify(models), DEFAULT_REMOTE_MODELS: JSON.stringify(models),
DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
`{ {% set first = true %} {% for key, value in input_request %} {% if key == "messages" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} {% if not first %},{% endif %} "{{ key }}": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }`
),
DEFAULT_RESPONSE_BODY_TRANSFORM: JSON.stringify(
'{{tojson(input_request)}}'
),
DEFAULT_REQUEST_HEADERS_TRANSFORM: JSON.stringify(
'Authorization: Bearer {{api_key}}'
),
}, },
}, },
{ {
@ -26,18 +36,7 @@ export default defineConfig([
file: 'dist/node/index.cjs.js', file: 'dist/node/index.cjs.js',
}, },
define: { define: {
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.46'), CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
},
},
{
input: 'src/node/cpuInfo.ts',
output: {
format: 'cjs',
file: 'dist/node/cpuInfo.js',
},
external: ['cpu-instructions'],
resolve: {
extensions: ['.ts', '.js', '.svg'],
}, },
}, },
]) ])

View File

@ -1,7 +1,11 @@
declare const API_URL: string declare const API_URL: string
declare const CORTEX_ENGINE_VERSION: string declare const CORTEX_ENGINE_VERSION: string
declare const PLATFORM: string
declare const SOCKET_URL: string declare const SOCKET_URL: string
declare const NODE: string declare const NODE: string
declare const DEFAULT_REQUEST_PAYLOAD_TRANSFORM: string
declare const DEFAULT_RESPONSE_BODY_TRANSFORM: string
declare const DEFAULT_REQUEST_HEADERS_TRANSFORM: string
declare const DEFAULT_REMOTE_ENGINES: ({ declare const DEFAULT_REMOTE_ENGINES: ({
id: string id: string

View File

@ -19,12 +19,16 @@ import ky, { HTTPError } from 'ky'
import PQueue from 'p-queue' import PQueue from 'p-queue'
import { EngineError } from './error' import { EngineError } from './error'
import { getJanDataFolderPath } from '@janhq/core' import { getJanDataFolderPath } from '@janhq/core'
import { engineVariant } from './utils'
interface ModelList {
data: Model[]
}
/** /**
* JSONEngineManagementExtension is a EngineManagementExtension implementation that provides * JanEngineManagementExtension is a EngineManagementExtension implementation that provides
* functionality for managing engines. * functionality for managing engines.
*/ */
export default class JSONEngineManagementExtension extends EngineManagementExtension { export default class JanEngineManagementExtension extends EngineManagementExtension {
queue = new PQueue({ concurrency: 1 }) queue = new PQueue({ concurrency: 1 })
/** /**
@ -63,13 +67,12 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
* @returns A Promise that resolves to an object of list engines. * @returns A Promise that resolves to an object of list engines.
*/ */
async getRemoteModels(name: string): Promise<any> { async getRemoteModels(name: string): Promise<any> {
return this.queue.add(() => return ky
ky
.get(`${API_URL}/v1/models/remote/${name}`) .get(`${API_URL}/v1/models/remote/${name}`)
.json<Model[]>() .json<ModelList>()
.then((e) => e) .catch(() => ({
.catch(() => []) data: [],
) as Promise<Model[]> })) as Promise<ModelList>
} }
/** /**
@ -138,9 +141,38 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
* Add a new remote engine * Add a new remote engine
* @returns A Promise that resolves to intall of engine. * @returns A Promise that resolves to intall of engine.
*/ */
async addRemoteEngine(engineConfig: EngineConfig) { async addRemoteEngine(
engineConfig: EngineConfig,
persistModels: boolean = true
) {
// Populate default settings
if (
engineConfig.metadata?.transform_req?.chat_completions &&
!engineConfig.metadata.transform_req.chat_completions.template
)
engineConfig.metadata.transform_req.chat_completions.template =
DEFAULT_REQUEST_PAYLOAD_TRANSFORM
if (
engineConfig.metadata?.transform_resp?.chat_completions &&
!engineConfig.metadata.transform_resp.chat_completions?.template
)
engineConfig.metadata.transform_resp.chat_completions.template =
DEFAULT_RESPONSE_BODY_TRANSFORM
if (engineConfig.metadata && !engineConfig.metadata?.header_template)
engineConfig.metadata.header_template = DEFAULT_REQUEST_HEADERS_TRANSFORM
return this.queue.add(() => return this.queue.add(() =>
ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => e) ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => {
if (persistModels && engineConfig.metadata?.get_models_url) {
// Pull /models from remote models endpoint
return this.populateRemoteModels(engineConfig)
.then(() => e)
.catch(() => e)
}
return e
})
) as Promise<{ messages: string }> ) as Promise<{ messages: string }>
} }
@ -161,9 +193,25 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
* @param model - Remote model object. * @param model - Remote model object.
*/ */
async addRemoteModel(model: Model) { async addRemoteModel(model: Model) {
return this.queue.add(() => return this.queue
ky.post(`${API_URL}/v1/models/add`, { json: model }).then((e) => e) .add(() =>
ky
.post(`${API_URL}/v1/models/add`, {
json: {
inference_params: {
max_tokens: 4096,
temperature: 0.7,
top_p: 0.95,
stream: true,
frequency_penalty: 0,
presence_penalty: 0,
},
...model,
},
})
.then((e) => e)
) )
.then(() => {})
} }
/** /**
@ -245,11 +293,7 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
error instanceof EngineError error instanceof EngineError
) { ) {
const systemInfo = await systemInformation() const systemInfo = await systemInformation()
const variant = await executeOnMain( const variant = await engineVariant(systemInfo.gpuSetting)
NODE,
'engineVariant',
systemInfo.gpuSetting
)
await this.setDefaultEngineVariant(InferenceEngine.cortex_llamacpp, { await this.setDefaultEngineVariant(InferenceEngine.cortex_llamacpp, {
variant: variant, variant: variant,
version: `${CORTEX_ENGINE_VERSION}`, version: `${CORTEX_ENGINE_VERSION}`,
@ -293,14 +337,40 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
data.api_key = api_key data.api_key = api_key
/// END - Migrate legacy api key settings /// END - Migrate legacy api key settings
await this.addRemoteEngine(data).catch(console.error) await this.addRemoteEngine(data, false).catch(console.error)
}) })
) )
events.emit(EngineEvent.OnEngineUpdate, {}) events.emit(EngineEvent.OnEngineUpdate, {})
DEFAULT_REMOTE_MODELS.forEach(async (data: Model) => { await Promise.all(
await this.addRemoteModel(data).catch(() => {}) DEFAULT_REMOTE_MODELS.map((data: Model) =>
}) this.addRemoteModel(data).catch(() => {})
)
)
events.emit(ModelEvent.OnModelsUpdate, { fetch: true }) events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
} }
} }
/**
* Pulls models list from the remote provider and persist
* @param engineConfig
* @returns
*/
private populateRemoteModels = async (engineConfig: EngineConfig) => {
return this.getRemoteModels(engineConfig.engine)
.then((models: ModelList) => {
if (models?.data)
Promise.all(
models.data.map((model) =>
this.addRemoteModel({
...model,
engine: engineConfig.engine as InferenceEngine,
model: model.model ?? model.id,
}).catch(console.info)
)
).then(() => {
events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
})
})
.catch(console.info)
}
} }

View File

@ -1,27 +0,0 @@
import { cpuInfo } from 'cpu-instructions'
// Check the CPU info and determine the supported instruction set
const info = cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
? 'avx512'
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
? 'avx2'
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
? 'avx'
: 'noavx'
// Send the result and wait for confirmation before exiting
new Promise<void>((resolve, reject) => {
// @ts-ignore
process.send(info, (error: Error | null) => {
if (error) {
reject(error)
} else {
resolve()
}
})
})
.then(() => process.exit(0))
.catch((error) => {
console.error('Failed to send info:', error)
process.exit(1)
})

View File

@ -1,7 +1,6 @@
import { describe, expect, it } from '@jest/globals' import { describe, expect, it } from '@jest/globals'
import engine from './index' import engine from './index'
import { GpuSetting } from '@janhq/core/node' import { GpuSetting } from '@janhq/core'
import { cpuInfo } from 'cpu-instructions'
import { fork } from 'child_process' import { fork } from 'child_process'
let testSettings: GpuSetting = { let testSettings: GpuSetting = {
@ -23,22 +22,12 @@ let testSettings: GpuSetting = {
} }
const originalPlatform = process.platform const originalPlatform = process.platform
jest.mock('cpu-instructions', () => ({
cpuInfo: {
cpuInfo: jest.fn(),
},
}))
let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
mockCpuInfo.mockReturnValue([])
jest.mock('@janhq/core/node', () => ({
jest.mock('@janhq/core', () => ({
appResourcePath: () => '.', appResourcePath: () => '.',
log: jest.fn(), log: jest.fn(),
})) }))
jest.mock('child_process', () => ({
fork: jest.fn(),
}))
const mockFork = fork as jest.Mock
describe('test executable cortex file', () => { describe('test executable cortex file', () => {
afterAll(function () { afterAll(function () {
@ -48,14 +37,7 @@ describe('test executable cortex file', () => {
}) })
it('executes on MacOS', () => { it('executes on MacOS', () => {
const mockProcess = {
on: jest.fn((event, callback) => {
if (event === 'message') {
callback('noavx')
}
}),
send: jest.fn(),
}
Object.defineProperty(process, 'platform', { Object.defineProperty(process, 'platform', {
value: 'darwin', value: 'darwin',
}) })
@ -63,7 +45,7 @@ describe('test executable cortex file', () => {
value: 'arm64', value: 'arm64',
}) })
mockFork.mockReturnValue(mockProcess)
expect(engine.engineVariant(testSettings)).resolves.toEqual('mac-arm64') expect(engine.engineVariant(testSettings)).resolves.toEqual('mac-arm64')
}) })
@ -83,7 +65,7 @@ describe('test executable cortex file', () => {
}), }),
send: jest.fn(), send: jest.fn(),
} }
mockFork.mockReturnValue(mockProcess)
Object.defineProperty(process, 'arch', { Object.defineProperty(process, 'arch', {
value: 'x64', value: 'x64',
}) })
@ -107,7 +89,6 @@ describe('test executable cortex file', () => {
}), }),
send: jest.fn(), send: jest.fn(),
} }
mockFork.mockReturnValue(mockProcess)
expect(engine.engineVariant()).resolves.toEqual('windows-amd64-avx') expect(engine.engineVariant()).resolves.toEqual('windows-amd64-avx')
}) })
@ -145,7 +126,6 @@ describe('test executable cortex file', () => {
}), }),
send: jest.fn(), send: jest.fn(),
} }
mockFork.mockReturnValue(mockProcess)
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
'windows-amd64-avx2-cuda-11-7' 'windows-amd64-avx2-cuda-11-7'
@ -176,26 +156,11 @@ describe('test executable cortex file', () => {
}, },
], ],
} }
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback('noavx')
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
'windows-amd64-noavx-cuda-12-0' 'windows-amd64-noavx-cuda-12-0'
) )
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback('avx512')
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
'windows-amd64-avx2-cuda-12-0' 'windows-amd64-avx2-cuda-12-0'
) )
@ -209,14 +174,6 @@ describe('test executable cortex file', () => {
...testSettings, ...testSettings,
run_mode: 'cpu', run_mode: 'cpu',
} }
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback('noavx')
}
}),
send: jest.fn(),
})
expect(engine.engineVariant()).resolves.toEqual('linux-amd64-noavx') expect(engine.engineVariant()).resolves.toEqual('linux-amd64-noavx')
}) })
@ -245,16 +202,6 @@ describe('test executable cortex file', () => {
}, },
], ],
} }
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback('avx512')
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toBe( expect(engine.engineVariant(settings)).resolves.toBe(
'linux-amd64-avx2-cuda-11-7' 'linux-amd64-avx2-cuda-11-7'
) )
@ -284,14 +231,7 @@ describe('test executable cortex file', () => {
}, },
], ],
} }
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback('avx2')
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
'linux-amd64-avx2-cuda-12-0' 'linux-amd64-avx2-cuda-12-0'
@ -310,15 +250,6 @@ describe('test executable cortex file', () => {
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
cpuInstructions.forEach((instruction) => { cpuInstructions.forEach((instruction) => {
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback(instruction)
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
`linux-amd64-${instruction}` `linux-amd64-${instruction}`
) )
@ -335,14 +266,7 @@ describe('test executable cortex file', () => {
} }
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
cpuInstructions.forEach((instruction) => { cpuInstructions.forEach((instruction) => {
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback(instruction)
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
`windows-amd64-${instruction}` `windows-amd64-${instruction}`
) )
@ -376,14 +300,7 @@ describe('test executable cortex file', () => {
} }
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
cpuInstructions.forEach((instruction) => { cpuInstructions.forEach((instruction) => {
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback(instruction)
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
`windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0` `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
) )
@ -417,14 +334,7 @@ describe('test executable cortex file', () => {
], ],
} }
cpuInstructions.forEach((instruction) => { cpuInstructions.forEach((instruction) => {
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback(instruction)
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
`linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0` `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
) )
@ -459,14 +369,7 @@ describe('test executable cortex file', () => {
], ],
} }
cpuInstructions.forEach((instruction) => { cpuInstructions.forEach((instruction) => {
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback(instruction)
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
`linux-amd64-vulkan` `linux-amd64-vulkan`
) )

View File

@ -2,111 +2,10 @@ import * as path from 'path'
import { import {
appResourcePath, appResourcePath,
getJanDataFolderPath, getJanDataFolderPath,
GpuSetting,
log, log,
} from '@janhq/core/node' } from '@janhq/core/node'
import { fork } from 'child_process'
import { mkdir, readdir, symlink } from 'fs/promises' import { mkdir, readdir, symlink } from 'fs/promises'
/**
* The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
* @param settings
* @returns
*/
const gpuRunMode = (settings?: GpuSetting): string => {
if (process.platform === 'darwin')
// MacOS now has universal binaries
return ''
if (!settings) return ''
return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
}
/**
* The OS & architecture that the current process is running on.
* @returns win, mac-x64, mac-arm64, or linux
*/
const os = (): string => {
return process.platform === 'win32'
? 'windows-amd64'
: process.platform === 'darwin'
? process.arch === 'arm64'
? 'mac-arm64'
: 'mac-amd64'
: 'linux-amd64'
}
/**
* The CUDA version that will be set - either '11-7' or '12-0'.
* @param settings
* @returns
*/
const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
const isUsingCuda =
settings?.vulkan !== true &&
settings?.run_mode === 'gpu' &&
!os().includes('mac')
if (!isUsingCuda) return undefined
return settings?.cuda?.version === '11' ? '11-7' : '12-0'
}
/**
* The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
* @returns
*/
const cpuInstructions = async (): Promise<string> => {
if (process.platform === 'darwin') return ''
const child = fork(path.join(__dirname, './cpuInfo.js')) // Path to the child process file
return new Promise((resolve, reject) => {
child.on('message', (cpuInfo?: string) => {
resolve(cpuInfo ?? 'noavx')
child.kill() // Kill the child process after receiving the result
})
child.on('error', (err) => {
resolve('noavx')
child.kill()
})
child.on('exit', (code) => {
if (code !== 0) {
resolve('noavx')
child.kill()
}
})
})
}
/**
* Find which variant to run based on the current platform.
*/
const engineVariant = async (gpuSetting?: GpuSetting): Promise<string> => {
const cpuInstruction = await cpuInstructions()
log(`[CORTEX]: CPU instruction: ${cpuInstruction}`)
let engineVariant = [
os(),
gpuSetting?.vulkan
? 'vulkan'
: gpuRunMode(gpuSetting) !== 'cuda'
? // CPU mode - support all variants
cpuInstruction
: // GPU mode - packaged CUDA variants of avx2 and noavx
cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
? 'avx2'
: 'noavx',
gpuRunMode(gpuSetting),
cudaVersion(gpuSetting),
]
.filter((e) => !!e)
.join('-')
log(`[CORTEX]: Engine variant: ${engineVariant}`)
return engineVariant
}
/** /**
* Create symlink to each variant for the default bundled version * Create symlink to each variant for the default bundled version
@ -148,6 +47,5 @@ const symlinkEngines = async () => {
} }
export default { export default {
engineVariant,
symlinkEngines, symlinkEngines,
} }

View File

@ -0,0 +1,86 @@
import { GpuSetting, log } from '@janhq/core'
/**
* The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
* @param settings
* @returns
*/
const gpuRunMode = (settings?: GpuSetting): string => {
return settings.gpus?.some(
(gpu) =>
gpu.activated === true &&
gpu.additional_information &&
gpu.additional_information.driver_version
)
? 'cuda'
: ''
}
/**
* The OS & architecture that the current process is running on.
* @returns win, mac-x64, mac-arm64, or linux
*/
const os = (settings?: GpuSetting): string => {
return PLATFORM === 'win32'
? 'windows-amd64'
: PLATFORM === 'darwin'
? settings?.cpu?.arch === 'arm64'
? 'mac-arm64'
: 'mac-amd64'
: 'linux-amd64'
}
/**
* The CUDA version that will be set - either '11-7' or '12-0'.
* @param settings
* @returns
*/
const cudaVersion = (settings?: GpuSetting): '12-0' | '11-7' | undefined => {
const isUsingCuda =
settings?.vulkan !== true &&
settings?.gpus?.some((gpu) => (gpu.activated === true ? 'gpu' : 'cpu')) &&
!os().includes('mac')
if (!isUsingCuda) return undefined
// return settings?.cuda?.version === '11' ? '11-7' : '12-0'
return settings.gpus?.some((gpu) => gpu.version.includes('12'))
? '12-0'
: '11-7'
}
/**
* The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
* @returns
*/
/**
* Find which variant to run based on the current platform.
*/
export const engineVariant = async (
gpuSetting?: GpuSetting
): Promise<string> => {
const platform = os(gpuSetting)
// There is no need to append the variant extension for mac
if (platform.startsWith('mac')) return platform
let engineVariant =
gpuSetting?.vulkan || gpuSetting.gpus.some((e) => !e.additional_information)
? [platform, 'vulkan']
: [
platform,
gpuRunMode(gpuSetting) === 'cuda' &&
(gpuSetting.cpu.instructions.includes('avx2') ||
gpuSetting.cpu.instructions.includes('avx512'))
? 'avx2'
: 'noavx',
gpuRunMode(gpuSetting),
cudaVersion(gpuSetting),
].filter(Boolean) // Remove any falsy values
let engineVariantString = engineVariant.join('-')
log(`[CORTEX]: Engine variant: ${engineVariantString}`)
return engineVariantString
}

View File

@ -0,0 +1,5 @@
/** @type {import('ts-jest').JestConfigWithTsJest} */
module.exports = {
preset: 'ts-jest',
testEnvironment: 'node',
}

View File

@ -0,0 +1,48 @@
{
"name": "@janhq/hardware-management-extension",
"productName": "Hardware Management",
"version": "1.0.0",
"description": "Manages Better Hardware settings.",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",
"author": "Jan <service@jan.ai>",
"license": "MIT",
"scripts": {
"test": "jest",
"build": "rolldown -c rolldown.config.mjs",
"codesign:darwin": "../../.github/scripts/auto-sign.sh",
"codesign:win32:linux": "echo 'No codesigning required'",
"codesign": "run-script-os",
"build:publish": "rimraf *.tgz --glob || true && yarn build && yarn codesign && npm pack && cpx *.tgz ../../pre-install"
},
"exports": {
".": "./dist/index.js",
"./main": "./dist/module.js"
},
"devDependencies": {
"cpx": "^1.5.0",
"rimraf": "^3.0.2",
"rolldown": "^1.0.0-beta.1",
"run-script-os": "^1.1.6",
"ts-loader": "^9.5.0",
"typescript": "^5.3.3"
},
"dependencies": {
"@janhq/core": "../../core/package.tgz",
"cpu-instructions": "^0.0.13",
"ky": "^1.7.2",
"p-queue": "^8.0.1"
},
"bundledDependencies": [
"cpu-instructions",
"@janhq/core"
],
"hardwares": {
"node": ">=18.0.0"
},
"files": [
"dist/*",
"package.json",
"README.md"
]
}

View File

@ -0,0 +1,17 @@
import { defineConfig } from 'rolldown'
import pkgJson from './package.json' with { type: 'json' }
export default defineConfig([
{
input: 'src/index.ts',
output: {
format: 'esm',
file: 'dist/index.js',
},
define: {
NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
API_URL: JSON.stringify('http://127.0.0.1:39291'),
SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
},
},
])

View File

@ -0,0 +1,12 @@
declare const API_URL: string
declare const SOCKET_URL: string
declare const NODE: string
interface Core {
api: APIFunctions
events: EventEmitter
}
interface Window {
core?: Core | undefined
electronAPI?: any | undefined
}

View File

@ -0,0 +1,67 @@
import {
executeOnMain,
HardwareManagementExtension,
HardwareInformation,
} from '@janhq/core'
import ky from 'ky'
import PQueue from 'p-queue'
/**
* JSONHardwareManagementExtension is a HardwareManagementExtension implementation that provides
* functionality for managing engines.
*/
export default class JSONHardwareManagementExtension extends HardwareManagementExtension {
queue = new PQueue({ concurrency: 1 })
/**
* Called when the extension is loaded.
*/
async onLoad() {
// Run Healthcheck
this.queue.add(() => this.healthz())
}
/**
* Called when the extension is unloaded.
*/
onUnload() {}
/**
* Do health check on cortex.cpp
* @returns
*/
async healthz(): Promise<void> {
return ky
.get(`${API_URL}/healthz`, {
retry: { limit: 20, delay: () => 500, methods: ['get'] },
})
.then(() => {})
}
/**
* @returns A Promise that resolves to an object of hardware.
*/
async getHardware(): Promise<HardwareInformation> {
return this.queue.add(() =>
ky
.get(`${API_URL}/v1/hardware`)
.json<HardwareInformation>()
.then((e) => e)
) as Promise<HardwareInformation>
}
/**
* @returns A Promise that resolves to an object of set gpu activate.
*/
async setAvtiveGpu(data: { gpus: number[] }): Promise<{
message: string
activated_gpus: number[]
}> {
return this.queue.add(() =>
ky.post(`${API_URL}/v1/hardware/activate`, { json: data }).then((e) => e)
) as Promise<{
message: string
activated_gpus: number[]
}>
}
}

View File

@ -8,7 +8,9 @@
"forceConsistentCasingInFileNames": true, "forceConsistentCasingInFileNames": true,
"strict": false, "strict": false,
"skipLibCheck": true, "skipLibCheck": true,
"rootDir": "./src" "rootDir": "./src",
"resolveJsonModule": true
}, },
"include": ["./src"] "include": ["./src"],
"exclude": ["src/**/*.test.ts", "rolldown.config.mjs"]
} }

View File

@ -1 +1 @@
1.0.9-rc4 1.0.10

View File

@ -1,7 +1,7 @@
{ {
"name": "@janhq/inference-cortex-extension", "name": "@janhq/inference-cortex-extension",
"productName": "Cortex Inference Engine", "productName": "Cortex Inference Engine",
"version": "1.0.24", "version": "1.0.25",
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"main": "dist/index.js", "main": "dist/index.js",
"node": "dist/node/index.cjs.js", "node": "dist/node/index.cjs.js",

View File

@ -76,7 +76,7 @@
}, },
{ {
"key": "use_mmap", "key": "use_mmap",
"title": "MMAP", "title": "mmap",
"description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.", "description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
"controllerType": "checkbox", "controllerType": "checkbox",
"controllerProps": { "controllerProps": {

View File

@ -0,0 +1,35 @@
{
"sources": [
{
"filename": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf"
}
],
"id": "deepseek-r1-distill-llama-70b",
"object": "model",
"name": "DeepSeek R1 Distill Llama 70B Q4",
"version": "1.0",
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
"format": "gguf",
"settings": {
"ctx_len": 131072,
"prompt_template": "<User> {prompt} <Assistant>",
"llama_model_path": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
"ngl": 81
},
"parameters": {
"temperature": 0.6,
"top_p": 0.95,
"stream": true,
"max_tokens": 131072,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "DeepSeek",
"tags": ["70B", "Featured"],
"size": 42500000000
},
"engine": "llama-cpp"
}

View File

@ -0,0 +1,35 @@
{
"sources": [
{
"filename": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf"
}
],
"id": "deepseek-r1-distill-llama-8b",
"object": "model",
"name": "DeepSeek R1 Distill Llama 8B Q5",
"version": "1.0",
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
"format": "gguf",
"settings": {
"ctx_len": 131072,
"prompt_template": "<User> {prompt} <Assistant>",
"llama_model_path": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
"ngl": 33
},
"parameters": {
"temperature": 0.6,
"top_p": 0.95,
"stream": true,
"max_tokens": 131072,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "DeepSeek",
"tags": ["8B", "Featured"],
"size": 5730000000
},
"engine": "llama-cpp"
}

View File

@ -0,0 +1,35 @@
{
"sources": [
{
"filename": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
}
],
"id": "deepseek-r1-distill-qwen-1.5b",
"object": "model",
"name": "DeepSeek R1 Distill Qwen 1.5B Q5",
"version": "1.0",
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
"format": "gguf",
"settings": {
"ctx_len": 131072,
"prompt_template": "<User> {prompt} <Assistant>",
"llama_model_path": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
"ngl": 29
},
"parameters": {
"temperature": 0.6,
"top_p": 0.95,
"stream": true,
"max_tokens": 131072,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "DeepSeek",
"tags": ["1.5B", "Featured"],
"size": 1290000000
},
"engine": "llama-cpp"
}

View File

@ -0,0 +1,35 @@
{
"sources": [
{
"filename": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf"
}
],
"id": "deepseek-r1-distill-qwen-14b",
"object": "model",
"name": "DeepSeek R1 Distill Qwen 14B Q4",
"version": "1.0",
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
"format": "gguf",
"settings": {
"ctx_len": 131072,
"prompt_template": "<User> {prompt} <Assistant>",
"llama_model_path": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
"ngl": 49
},
"parameters": {
"temperature": 0.6,
"top_p": 0.95,
"stream": true,
"max_tokens": 131072,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "DeepSeek",
"tags": ["14B", "Featured"],
"size": 8990000000
},
"engine": "llama-cpp"
}

View File

@ -0,0 +1,35 @@
{
"sources": [
{
"filename": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf"
}
],
"id": "deepseek-r1-distill-qwen-32b",
"object": "model",
"name": "DeepSeek R1 Distill Qwen 32B Q4",
"version": "1.0",
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
"format": "gguf",
"settings": {
"ctx_len": 131072,
"prompt_template": "<User> {prompt} <Assistant>",
"llama_model_path": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
"ngl": 65
},
"parameters": {
"temperature": 0.6,
"top_p": 0.95,
"stream": true,
"max_tokens": 131072,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "DeepSeek",
"tags": ["32B", "Featured"],
"size": 19900000000
},
"engine": "llama-cpp"
}

View File

@ -0,0 +1,35 @@
{
"sources": [
{
"filename": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf"
}
],
"id": "deepseek-r1-distill-qwen-7b",
"object": "model",
"name": "DeepSeek R1 Distill Qwen 7B Q5",
"version": "1.0",
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
"format": "gguf",
"settings": {
"ctx_len": 131072,
"prompt_template": "<User> {prompt} <Assistant>",
"llama_model_path": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
"ngl": 29
},
"parameters": {
"temperature": 0.6,
"top_p": 0.95,
"stream": true,
"max_tokens": 131072,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "DeepSeek",
"tags": ["7B", "Featured"],
"size": 5440000000
},
"engine": "llama-cpp"
}

View File

@ -22,19 +22,13 @@
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 8192, "max_tokens": 8192,
"stop": [ "stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
"<|end_of_text|>",
"<|eot_id|>",
"<|eom_id|>"
],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0
}, },
"metadata": { "metadata": {
"author": "MetaAI", "author": "MetaAI",
"tags": [ "tags": ["8B", "Featured"],
"8B", "Featured"
],
"size": 4920000000 "size": 4920000000
}, },
"engine": "llama-cpp" "engine": "llama-cpp"

View File

@ -49,6 +49,13 @@ import qwen2514bJson from './resources/models/qwen2.5-14b-instruct/model.json' w
import qwen2532bJson from './resources/models/qwen2.5-32b-instruct/model.json' with { type: 'json' } import qwen2532bJson from './resources/models/qwen2.5-32b-instruct/model.json' with { type: 'json' }
import qwen2572bJson from './resources/models/qwen2.5-72b-instruct/model.json' with { type: 'json' } import qwen2572bJson from './resources/models/qwen2.5-72b-instruct/model.json' with { type: 'json' }
import deepseekR1DistillQwen_1_5b from './resources/models/deepseek-r1-distill-qwen-1.5b/model.json' with { type: 'json' }
import deepseekR1DistillQwen_7b from './resources/models/deepseek-r1-distill-qwen-7b/model.json' with { type: 'json' }
import deepseekR1DistillQwen_14b from './resources/models/deepseek-r1-distill-qwen-14b/model.json' with { type: 'json' }
import deepseekR1DistillQwen_32b from './resources/models/deepseek-r1-distill-qwen-32b/model.json' with { type: 'json' }
import deepseekR1DistillLlama_8b from './resources/models/deepseek-r1-distill-llama-8b/model.json' with { type: 'json' }
import deepseekR1DistillLlama_70b from './resources/models/deepseek-r1-distill-llama-70b/model.json' with { type: 'json' }
export default defineConfig([ export default defineConfig([
{ {
input: 'src/index.ts', input: 'src/index.ts',
@ -106,12 +113,18 @@ export default defineConfig([
qwen2514bJson, qwen2514bJson,
qwen2532bJson, qwen2532bJson,
qwen2572bJson, qwen2572bJson,
deepseekR1DistillQwen_1_5b,
deepseekR1DistillQwen_7b,
deepseekR1DistillQwen_14b,
deepseekR1DistillQwen_32b,
deepseekR1DistillLlama_8b,
deepseekR1DistillLlama_70b,
]), ]),
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
SETTINGS: JSON.stringify(defaultSettingJson), SETTINGS: JSON.stringify(defaultSettingJson),
CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'), CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'), CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.46'), CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
}, },
}, },
{ {

View File

@ -112,8 +112,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
// Run the process watchdog // Run the process watchdog
const systemInfo = await systemInformation() // const systemInfo = await systemInformation()
this.queue.add(() => executeOnMain(NODE, 'run', systemInfo)) this.queue.add(() => executeOnMain(NODE, 'run'))
this.queue.add(() => this.healthz()) this.queue.add(() => this.healthz())
this.subscribeToEvents() this.subscribeToEvents()

View File

@ -16,15 +16,20 @@ let watchdog: ProcessWatchdog | undefined = undefined
* Spawns a Nitro subprocess. * Spawns a Nitro subprocess.
* @returns A promise that resolves when the Nitro subprocess is started. * @returns A promise that resolves when the Nitro subprocess is started.
*/ */
function run(systemInfo?: SystemInformation): Promise<any> { function run(): Promise<any> {
log(`[CORTEX]:: Spawning cortex subprocess...`) log(`[CORTEX]:: Spawning cortex subprocess...`)
return new Promise<void>(async (resolve, reject) => { return new Promise<void>(async (resolve, reject) => {
let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? '' // let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
let binaryName = `cortex-server${process.platform === 'win32' ? '.exe' : ''}` let binaryName = `cortex-server${
process.platform === 'win32' ? '.exe' : ''
}`
const binPath = path.join(__dirname, '..', 'bin') const binPath = path.join(__dirname, '..', 'bin')
const executablePath = path.join(binPath, binaryName) const executablePath = path.join(binPath, binaryName)
addEnvPaths(binPath)
const sharedPath = path.join(appResourcePath(), 'shared') const sharedPath = path.join(appResourcePath(), 'shared')
// Execute the binary // Execute the binary
log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`) log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`)
@ -44,15 +49,17 @@ function run(systemInfo?: SystemInformation): Promise<any> {
`${path.join(dataFolderPath, '.janrc')}`, `${path.join(dataFolderPath, '.janrc')}`,
'--data_folder_path', '--data_folder_path',
dataFolderPath, dataFolderPath,
'--loglevel',
'INFO',
], ],
{ {
env: { env: {
...process.env, ...process.env,
CUDA_VISIBLE_DEVICES: gpuVisibleDevices, // CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
// Vulkan - Support 1 device at a time for now // // Vulkan - Support 1 device at a time for now
...(gpuVisibleDevices?.length > 0 && { // ...(gpuVisibleDevices?.length > 0 && {
GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices, // GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
}), // }),
}, },
cwd: sharedPath, cwd: sharedPath,
} }
@ -71,6 +78,22 @@ function dispose() {
watchdog?.terminate() watchdog?.terminate()
} }
/**
* Set the environment paths for the cortex subprocess
* @param dest
*/
function addEnvPaths(dest: string) {
// Add engine path to the PATH and LD_LIBRARY_PATH
if (process.platform === 'win32') {
process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
} else {
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
path.delimiter,
dest
)
}
}
/** /**
* Cortex process info * Cortex process info
*/ */

View File

@ -14,8 +14,6 @@ import {
} from '@janhq/core' } from '@janhq/core'
import { CortexAPI } from './cortex' import { CortexAPI } from './cortex'
import { scanModelsFolder } from './legacy/model-json' import { scanModelsFolder } from './legacy/model-json'
import { downloadModel } from './legacy/download'
import { systemInformation } from '@janhq/core'
import { deleteModelFiles } from './legacy/delete' import { deleteModelFiles } from './legacy/delete'
export enum Settings { export enum Settings {
@ -70,18 +68,6 @@ export default class JanModelExtension extends ModelExtension {
* @returns A Promise that resolves when the model is downloaded. * @returns A Promise that resolves when the model is downloaded.
*/ */
async pullModel(model: string, id?: string, name?: string): Promise<void> { async pullModel(model: string, id?: string, name?: string): Promise<void> {
if (id) {
const model: Model = ModelManager.instance().get(id)
// Clip vision model - should not be handled by cortex.cpp
// TensorRT model - should not be handled by cortex.cpp
if (
model &&
(model.engine === InferenceEngine.nitro_tensorrt_llm ||
model.settings.vision_model)
) {
return downloadModel(model, (await systemInformation()).gpuSetting)
}
}
/** /**
* Sending POST to /models/pull/{id} endpoint to pull the model * Sending POST to /models/pull/{id} endpoint to pull the model
*/ */

View File

@ -2,15 +2,12 @@ import {
downloadFile, downloadFile,
DownloadRequest, DownloadRequest,
fs, fs,
GpuSetting,
InferenceEngine,
joinPath, joinPath,
Model, Model,
} from '@janhq/core' } from '@janhq/core'
export const downloadModel = async ( export const downloadModel = async (
model: Model, model: Model,
gpuSettings?: GpuSetting,
network?: { ignoreSSL?: boolean; proxy?: string } network?: { ignoreSSL?: boolean; proxy?: string }
): Promise<void> => { ): Promise<void> => {
const homedir = 'file://models' const homedir = 'file://models'
@ -27,41 +24,6 @@ export const downloadModel = async (
JSON.stringify(model, null, 2) JSON.stringify(model, null, 2)
) )
if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
if (!gpuSettings || gpuSettings.gpus.length === 0) {
console.error('No GPU found. Please check your GPU setting.')
return
}
const firstGpu = gpuSettings.gpus[0]
if (!firstGpu.name.toLowerCase().includes('nvidia')) {
console.error('No Nvidia GPU found. Please check your GPU setting.')
return
}
const gpuArch = firstGpu.arch
if (gpuArch === undefined) {
console.error('No GPU architecture found. Please check your GPU setting.')
return
}
if (!supportedGpuArch.includes(gpuArch)) {
console.debug(
`Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.`
)
return
}
const os = 'windows' // TODO: remove this hard coded value
const newSources = model.sources.map((source) => {
const newSource = { ...source }
newSource.url = newSource.url
.replace(/<os>/g, os)
.replace(/<gpuarch>/g, gpuArch)
return newSource
})
model.sources = newSources
}
console.debug(`Download sources: ${JSON.stringify(model.sources)}`) console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
if (model.sources.length > 1) { if (model.sources.length > 1) {

View File

@ -1,75 +0,0 @@
# Create a Jan Extension using Typescript
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
## Create Your Own Extension
To create your own extension, you can use this repository as a template! Just follow the below instructions:
1. Click the Use this template button at the top of the repository
2. Select Create a new repository
3. Select an owner and name for your new repository
4. Click Create repository
5. Clone your new repository
## Initial Setup
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
> [!NOTE]
>
> You'll need to have a reasonably modern version of
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
> [`nodenv`](https://github.com/nodenv/nodenv) or
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
> root of your repository to install the version specified in
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
1. :hammer_and_wrench: Install the dependencies
```bash
npm install
```
1. :building_construction: Package the TypeScript for distribution
```bash
npm run bundle
```
1. :white_check_mark: Check your artifact
There will be a tgz file in your extension directory now
## Update the Extension Metadata
The [`package.json`](package.json) file defines metadata about your extension, such as
extension name, main entry, description and version.
When you copy this repository, update `package.json` with the name, description for your extension.
## Update the Extension Code
The [`src/`](./src/) directory is the heart of your extension! This contains the
source code that will be run when your extension functions are invoked. You can replace the
contents of this directory with your own code.
There are a few things to keep in mind when writing your extension code:
- Most Jan Extension functions are processed asynchronously.
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
```typescript
import { events, MessageEvent, MessageRequest } from '@janhq/core'
function onStart(): Promise<any> {
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
this.inference(data)
)
}
```
For more information about the Jan Extension Core module, see the
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
So, what are you waiting for? Go ahead and start customizing your extension!

View File

@ -1,2 +0,0 @@
@echo off
.\node_modules\.bin\download https://catalog.jan.ai/vulkaninfoSDK.exe -o ./bin

View File

@ -1,49 +0,0 @@
{
"name": "@janhq/monitoring-extension",
"productName": "System Monitoring",
"version": "1.0.10",
"description": "Provides system health and OS level data.",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",
"author": "Jan <service@jan.ai>",
"license": "AGPL-3.0",
"scripts": {
"build": "rolldown -c rolldown.config.mjs && yarn download-artifacts",
"download-artifacts": "run-script-os && cpx \"bin/**\" \"dist/bin\"",
"download-artifacts:darwin": "echo 'No artifacts to download for darwin'",
"download-artifacts:win32": "download.bat",
"download-artifacts:linux": "download https://catalog.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
"build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
},
"exports": {
".": "./dist/index.js",
"./main": "./dist/node/index.cjs.js"
},
"devDependencies": {
"@types/node": "^20.11.4",
"@types/node-os-utils": "^1.3.4",
"cpx": "^1.5.0",
"download-cli": "^1.1.1",
"rimraf": "^3.0.2",
"rolldown": "1.0.0-beta.1",
"run-script-os": "^1.1.6",
"typescript": "^5.3.3"
},
"dependencies": {
"@janhq/core": "../../core/package.tgz",
"node-os-utils": "^1.3.7"
},
"files": [
"dist/*",
"package.json",
"README.md"
],
"bundleDependencies": [
"node-os-utils",
"@janhq/core"
],
"installConfig": {
"hoistingLimits": "workspaces"
},
"packageManager": "yarn@4.5.3"
}

View File

@ -1,22 +0,0 @@
[
{
"key": "log-enabled",
"title": "Enable App Logs",
"description": "Saves app logs locally on your computer. This enables you to send us crash reports.",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "log-cleaning-interval",
"title": "Log Cleaning Interval",
"description": "Automatically delete local logs after a certain time interval (in milliseconds).",
"controllerType": "input",
"controllerProps": {
"value": "120000",
"placeholder": "Interval in milliseconds. E.g. 120000",
"textAlign": "right"
}
}
]

View File

@ -1,32 +0,0 @@
import { defineConfig } from 'rolldown'
import packageJson from './package.json' with { type: 'json' }
import settingJson from './resources/settings.json' with { type: 'json' }
export default defineConfig([
{
input: 'src/index.ts',
output: {
format: 'esm',
file: 'dist/index.js',
},
platform: 'browser',
define: {
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
SETTINGS: JSON.stringify(settingJson),
},
},
{
input: 'src/node/index.ts',
external: ['@janhq/core/node'],
output: {
format: 'cjs',
file: 'dist/node/index.cjs.js',
sourcemap: false,
inlineDynamicImports: true,
},
resolve: {
extensions: ['.js', '.ts', '.json'],
},
platform: 'node',
},
])

View File

@ -1,19 +0,0 @@
declare const NODE: string
declare const SETTINGS: SettingComponentProps[]
type CpuGpuInfo = {
cpu: {
usage: number
}
gpu: GpuInfo[]
}
type GpuInfo = {
id: string
name: string
temperature: string
utilization: string
memoryTotal: string
memoryFree: string
memoryUtilization: string
}

View File

@ -1,90 +0,0 @@
import {
AppConfigurationEventName,
GpuSetting,
MonitoringExtension,
OperatingSystemInfo,
events,
executeOnMain,
} from '@janhq/core'
enum Settings {
logEnabled = 'log-enabled',
logCleaningInterval = 'log-cleaning-interval',
}
/**
* JanMonitoringExtension is a extension that provides system monitoring functionality.
* It implements the MonitoringExtension interface from the @janhq/core package.
*/
export default class JanMonitoringExtension extends MonitoringExtension {
/**
* Called when the extension is loaded.
*/
async onLoad() {
// Register extension settings
this.registerSettings(SETTINGS)
const logEnabled = await this.getSetting<boolean>(Settings.logEnabled, true)
const logCleaningInterval = parseInt(
await this.getSetting<string>(Settings.logCleaningInterval, '120000')
)
// Register File Logger provided by this extension
await executeOnMain(NODE, 'registerLogger', {
logEnabled,
logCleaningInterval: isNaN(logCleaningInterval)
? 120000
: logCleaningInterval,
})
// Attempt to fetch nvidia info
await executeOnMain(NODE, 'updateNvidiaInfo')
events.emit(AppConfigurationEventName.OnConfigurationUpdate, {})
}
onSettingUpdate<T>(key: string, value: T): void {
if (key === Settings.logEnabled) {
executeOnMain(NODE, 'updateLogger', { logEnabled: value })
} else if (key === Settings.logCleaningInterval) {
executeOnMain(NODE, 'updateLogger', { logCleaningInterval: value })
}
}
/**
* Called when the extension is unloaded.
*/
onUnload(): void {
// Register File Logger provided by this extension
executeOnMain(NODE, 'unregisterLogger')
}
/**
* Returns the GPU configuration.
* @returns A Promise that resolves to an object containing the GPU configuration.
*/
async getGpuSetting(): Promise<GpuSetting | undefined> {
return executeOnMain(NODE, 'getGpuConfig')
}
/**
* Returns information about the system resources.
* @returns A Promise that resolves to an object containing information about the system resources.
*/
getResourcesInfo(): Promise<any> {
return executeOnMain(NODE, 'getResourcesInfo')
}
/**
* Returns information about the current system load.
* @returns A Promise that resolves to an object containing information about the current system load.
*/
getCurrentLoad(): Promise<any> {
return executeOnMain(NODE, 'getCurrentLoad')
}
/**
* Returns information about the OS
* @returns
*/
getOsInfo(): Promise<OperatingSystemInfo> {
return executeOnMain(NODE, 'getOsInfo')
}
}

View File

@ -1,389 +0,0 @@
import {
GpuSetting,
GpuSettingInfo,
LoggerManager,
OperatingSystemInfo,
ResourceInfo,
SupportedPlatforms,
getJanDataFolderPath,
log,
} from '@janhq/core/node'
import { mem, cpu } from 'node-os-utils'
import { exec } from 'child_process'
import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs'
import path from 'path'
import os from 'os'
import { FileLogger } from './logger'
/**
* Path to the settings directory
**/
export const SETTINGS_DIR = path.join(getJanDataFolderPath(), 'settings')
/**
* Path to the settings file
**/
export const GPU_INFO_FILE = path.join(SETTINGS_DIR, 'settings.json')
/**
* Default GPU settings
* TODO: This needs to be refactored to support multiple accelerators
**/
const DEFAULT_SETTINGS: GpuSetting = {
notify: true,
run_mode: 'cpu',
nvidia_driver: {
exist: false,
version: '',
},
cuda: {
exist: false,
version: '',
},
gpus: [],
gpu_highest_vram: '',
gpus_in_use: [],
is_initial: true,
// TODO: This needs to be set based on user toggle in settings
vulkan: false,
}
export const getGpuConfig = async (): Promise<GpuSetting | undefined> => {
if (process.platform === 'darwin') return undefined
if (existsSync(GPU_INFO_FILE))
return JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
return DEFAULT_SETTINGS
}
export const getResourcesInfo = async (): Promise<ResourceInfo> => {
const ramUsedInfo = await mem.used()
const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
const resourceInfo: ResourceInfo = {
mem: {
totalMemory,
usedMemory,
},
}
return resourceInfo
}
export const getCurrentLoad = () =>
new Promise<CpuGpuInfo>(async (resolve, reject) => {
const cpuPercentage = await cpu.usage()
let data = {
run_mode: 'cpu',
gpus_in_use: [],
}
if (process.platform !== 'darwin') {
data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
}
if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
const gpuIds = data.gpus_in_use.join(',')
if (gpuIds !== '' && data['vulkan'] !== true) {
exec(
`nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
(error, stdout, _) => {
if (error) {
console.error(`exec error: ${error}`)
throw new Error(error.message)
}
const gpuInfo: GpuInfo[] = stdout
.trim()
.split('\n')
.map((line) => {
const [
id,
name,
temperature,
utilization,
memoryTotal,
memoryFree,
memoryUtilization,
] = line.split(', ').map((item) => item.replace(/\r/g, ''))
return {
id,
name,
temperature,
utilization,
memoryTotal,
memoryFree,
memoryUtilization,
}
})
resolve({
cpu: { usage: cpuPercentage },
gpu: gpuInfo,
})
}
)
} else {
// Handle the case where gpuIds is empty
resolve({
cpu: { usage: cpuPercentage },
gpu: [],
})
}
} else {
// Handle the case where run_mode is not 'gpu' or no GPUs are in use
resolve({
cpu: { usage: cpuPercentage },
gpu: [],
})
}
})
/**
* This will retrieve GPU information and persist settings.json
* Will be called when the extension is loaded to turn on GPU acceleration if supported
*/
export const updateNvidiaInfo = async () => {
// ignore if macos
if (process.platform === 'darwin') return
try {
JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
} catch (error) {
if (!existsSync(SETTINGS_DIR)) {
mkdirSync(SETTINGS_DIR, {
recursive: true,
})
}
writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2))
}
await updateNvidiaDriverInfo()
await updateGpuInfo()
}
const updateNvidiaDriverInfo = async () =>
new Promise((resolve, reject) => {
exec(
'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
(error, stdout) => {
const data: GpuSetting = JSON.parse(
readFileSync(GPU_INFO_FILE, 'utf-8')
)
if (!error) {
const firstLine = stdout.split('\n')[0].trim()
data.nvidia_driver.exist = true
data.nvidia_driver.version = firstLine
} else {
data.nvidia_driver.exist = false
}
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
resolve({})
}
)
})
const getGpuArch = (gpuName: string): string => {
if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'
if (gpuName.includes('30')) return 'ampere'
else if (gpuName.includes('40')) return 'ada'
else return 'unknown'
}
const updateGpuInfo = async () =>
new Promise((resolve, reject) => {
let data: GpuSetting = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
// Cuda
if (data.vulkan === true) {
// Vulkan
exec(
process.platform === 'win32'
? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
: `${__dirname}/../bin/vulkaninfo --summary`,
async (error, stdout) => {
if (!error) {
const output = stdout.toString()
log(output)
const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
const gpus: GpuSettingInfo[] = []
let match
while ((match = gpuRegex.exec(output)) !== null) {
const id = match[1]
const name = match[2]
const arch = getGpuArch(name)
gpus.push({ id, vram: '0', name, arch })
}
data.gpus = gpus
if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
}
data = await updateCudaExistence(data)
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
log(`[APP]::${JSON.stringify(data)}`)
resolve({})
} else {
reject(error)
}
}
)
} else {
exec(
'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
async (error, stdout) => {
if (!error) {
log(`[SPECS]::${stdout}`)
// Get GPU info and gpu has higher memory first
let highestVram = 0
let highestVramId = '0'
const gpus: GpuSettingInfo[] = stdout
.trim()
.split('\n')
.map((line) => {
let [id, vram, name] = line.split(', ')
const arch = getGpuArch(name)
vram = vram.replace(/\r/g, '')
if (parseFloat(vram) > highestVram) {
highestVram = parseFloat(vram)
highestVramId = id
}
return { id, vram, name, arch }
})
data.gpus = gpus
data.gpu_highest_vram = highestVramId
} else {
data.gpus = []
data.gpu_highest_vram = undefined
}
if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
data.gpus_in_use = data.gpu_highest_vram ? [data.gpu_highest_vram].filter(e => !!e) : []
}
data = await updateCudaExistence(data)
console.log('[MONITORING]::Cuda info: ', data)
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
log(`[APP]::${JSON.stringify(data)}`)
resolve({})
}
)
}
})
/**
* Check if file exists in paths
*/
const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => {
return paths.some((p) => existsSync(path.join(p, file)))
}
/**
* Validate cuda for linux and windows
*/
const updateCudaExistence = async (
data: GpuSetting = DEFAULT_SETTINGS
): Promise<GpuSetting> => {
let filesCuda12: string[]
let filesCuda11: string[]
let paths: string[]
let cudaVersion: string = ''
if (process.platform === 'win32') {
filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
filesCuda11 = ['cublas64_11.dll', 'cudart64_110.dll', 'cublasLt64_11.dll']
paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
} else {
filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
paths = process.env.LD_LIBRARY_PATH
? process.env.LD_LIBRARY_PATH.split(path.delimiter)
: []
paths.push('/usr/lib/x86_64-linux-gnu/')
}
let cudaExists = filesCuda12.every(
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
)
if (!cudaExists) {
cudaExists = filesCuda11.every(
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
)
if (cudaExists) {
cudaVersion = '11'
}
} else {
cudaVersion = '12'
}
data.cuda.exist = cudaExists
data.cuda.version = cudaVersion
console.debug(data.is_initial, data.gpus_in_use)
if (cudaExists && data.is_initial && data.gpus_in_use.length > 0) {
data.run_mode = 'gpu'
}
data.is_initial = false
// Attempt to query CUDA using NVIDIA SMI
if (!cudaExists) {
await new Promise<void>((resolve) => {
exec('nvidia-smi', (error, stdout) => {
if (!error) {
const regex = /CUDA\s*Version:\s*(\d+\.\d+)/g
const match = regex.exec(stdout)
if (match && match[1]) {
data.cuda.version = match[1]
}
}
console.log('[MONITORING]::Finalized cuda info update: ', data)
resolve()
})
})
}
return data
}
export const getOsInfo = (): OperatingSystemInfo => {
const platform =
SupportedPlatforms.find((p) => p === process.platform) || 'unknown'
const osInfo: OperatingSystemInfo = {
platform: platform,
arch: process.arch,
release: os.release(),
machine: os.machine(),
version: os.version(),
totalMem: os.totalmem(),
freeMem: os.freemem(),
}
return osInfo
}
export const registerLogger = ({ logEnabled, logCleaningInterval }) => {
const logger = new FileLogger(logEnabled, logCleaningInterval)
LoggerManager.instance().register(logger)
logger.cleanLogs()
}
export const unregisterLogger = () => {
LoggerManager.instance().unregister('file')
}
export const updateLogger = ({ logEnabled, logCleaningInterval }) => {
const logger = LoggerManager.instance().loggers.get('file') as FileLogger
if (logger && logEnabled !== undefined) logger.logEnabled = logEnabled
if (logger && logCleaningInterval)
logger.logCleaningInterval = logCleaningInterval
// Rerun
logger && logger.cleanLogs()
}

Some files were not shown because too many files have changed in this diff Show More