Merge pull request #4683 from janhq/chore/sync-release-to-dev

chore: sync release v0.5.15 branch into dev branch
This commit is contained in:
Louis 2025-02-18 18:40:03 +07:00 committed by GitHub
commit c4d7a143eb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
150 changed files with 3400 additions and 2643 deletions

View File

@ -9,31 +9,6 @@ jobs:
get-update-version: get-update-version:
uses: ./.github/workflows/template-get-update-version.yml uses: ./.github/workflows/template-get-update-version.yml
create-draft-release:
runs-on: ubuntu-latest
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
outputs:
upload_url: ${{ steps.create_release.outputs.upload_url }}
version: ${{ steps.get_version.outputs.version }}
permissions:
contents: write
steps:
- name: Extract tag name without v prefix
id: get_version
run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}"
env:
GITHUB_REF: ${{ github.ref }}
- name: Create Draft Release
id: create_release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ github.ref_name }}
token: ${{ secrets.GITHUB_TOKEN }}
name: "${{ env.VERSION }}"
draft: true
prerelease: false
generate_release_notes: true
build-macos: build-macos:
uses: ./.github/workflows/template-build-macos.yml uses: ./.github/workflows/template-build-macos.yml
secrets: inherit secrets: inherit
@ -65,7 +40,7 @@ jobs:
beta: true beta: true
sync-temp-to-latest: sync-temp-to-latest:
needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64] needs: [build-macos, build-windows-x64, build-linux-x64]
runs-on: ubuntu-latest runs-on: ubuntu-latest
permissions: permissions:
contents: write contents: write
@ -82,19 +57,15 @@ jobs:
AWS_DEFAULT_REGION: ${{ secrets.DELTA_AWS_REGION }} AWS_DEFAULT_REGION: ${{ secrets.DELTA_AWS_REGION }}
AWS_EC2_METADATA_DISABLED: "true" AWS_EC2_METADATA_DISABLED: "true"
- name: set release to prerelease
run: |
gh release edit v${{ needs.create-draft-release.outputs.version }} --draft=false --prerelease
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
noti-discord-and-update-url-readme: noti-discord-and-update-url-readme:
needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64, sync-temp-to-latest] needs: [build-macos, get-update-version, build-windows-x64, build-linux-x64, sync-temp-to-latest]
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Set version to environment variable - name: Set version to environment variable
run: | run: |
echo "VERSION=${{ needs.create-draft-release.outputs.version }}" >> $GITHUB_ENV VERSION=${{ needs.get-update-version.outputs.new_version }}
VERSION="${VERSION#v}"
echo "VERSION=$VERSION" >> $GITHUB_ENV
- name: Notify Discord - name: Notify Discord
uses: Ilshidur/action-discord@master uses: Ilshidur/action-discord@master
@ -105,6 +76,5 @@ jobs:
- macOS Universal: https://delta.jan.ai/beta/jan-beta-mac-universal-{{ VERSION }}.dmg - macOS Universal: https://delta.jan.ai/beta/jan-beta-mac-universal-{{ VERSION }}.dmg
- Linux Deb: https://delta.jan.ai/beta/jan-beta-linux-amd64-{{ VERSION }}.deb - Linux Deb: https://delta.jan.ai/beta/jan-beta-linux-amd64-{{ VERSION }}.deb
- Linux AppImage: https://delta.jan.ai/beta/jan-beta-linux-x86_64-{{ VERSION }}.AppImage - Linux AppImage: https://delta.jan.ai/beta/jan-beta-linux-x86_64-{{ VERSION }}.AppImage
- Github Release URL: https://github.com/janhq/jan/releases/tag/v{{ VERSION }}
env: env:
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_JAN_BETA }} DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_JAN_BETA }}

View File

@ -1,39 +0,0 @@
name: build-jan-server
on:
workflow_call:
inputs:
dockerfile_path:
required: false
type: string
default: './Dockerfile'
docker_image_tag:
required: true
type: string
default: 'ghcr.io/janhq/jan-server:dev-latest'
jobs:
build:
runs-on: ubuntu-latest
env:
REGISTRY: ghcr.io
IMAGE_NAME: janhq/jan-server
permissions:
packages: write
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push Docker image
uses: docker/build-push-action@v3
with:
context: .
file: ${{ inputs.dockerfile_path }}
push: true
tags: ${{ inputs.docker_image_tag }}

View File

@ -83,7 +83,7 @@ jobs:
cat ./electron/package.json cat ./electron/package.json
echo "------------------------" echo "------------------------"
cat ./package.json cat ./package.json
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
mv /tmp/package.json electron/package.json mv /tmp/package.json electron/package.json
cat electron/package.json cat electron/package.json

View File

@ -99,7 +99,7 @@ jobs:
cat ./electron/package.json cat ./electron/package.json
echo "------------------------" echo "------------------------"
cat ./package.json cat ./package.json
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
mv /tmp/package.json electron/package.json mv /tmp/package.json electron/package.json
cat electron/package.json cat electron/package.json

View File

@ -108,7 +108,7 @@ jobs:
cat ./package.json cat ./package.json
echo "------------------------" echo "------------------------"
cat ./electron/scripts/uninstaller.nsh cat ./electron/scripts/uninstaller.nsh
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
mv /tmp/package.json electron/package.json mv /tmp/package.json electron/package.json
cat electron/package.json cat electron/package.json

View File

@ -12,6 +12,7 @@ export enum ExtensionTypeEnum {
SystemMonitoring = 'systemMonitoring', SystemMonitoring = 'systemMonitoring',
HuggingFace = 'huggingFace', HuggingFace = 'huggingFace',
Engine = 'engine', Engine = 'engine',
Hardware = 'hardware',
} }
export interface ExtensionType { export interface ExtensionType {

View File

@ -38,8 +38,14 @@ describe('OAIEngine', () => {
it('should subscribe to events on load', () => { it('should subscribe to events on load', () => {
engine.onLoad() engine.onLoad()
expect(events.on).toHaveBeenCalledWith(MessageEvent.OnMessageSent, expect.any(Function)) expect(events.on).toHaveBeenCalledWith(
expect(events.on).toHaveBeenCalledWith(InferenceEvent.OnInferenceStopped, expect.any(Function)) MessageEvent.OnMessageSent,
expect.any(Function)
)
expect(events.on).toHaveBeenCalledWith(
InferenceEvent.OnInferenceStopped,
expect.any(Function)
)
}) })
it('should handle inference request', async () => { it('should handle inference request', async () => {
@ -77,7 +83,12 @@ describe('OAIEngine', () => {
expect(events.emit).toHaveBeenCalledWith( expect(events.emit).toHaveBeenCalledWith(
MessageEvent.OnMessageUpdate, MessageEvent.OnMessageUpdate,
expect.objectContaining({ expect.objectContaining({
content: [{ type: ContentType.Text, text: { value: 'test response', annotations: [] } }], content: [
{
type: ContentType.Text,
text: { value: 'test response', annotations: [] },
},
],
status: MessageStatus.Ready, status: MessageStatus.Ready,
}) })
) )
@ -101,11 +112,10 @@ describe('OAIEngine', () => {
await engine.inference(data) await engine.inference(data)
expect(events.emit).toHaveBeenCalledWith( expect(events.emit).toHaveBeenLastCalledWith(
MessageEvent.OnMessageUpdate, MessageEvent.OnMessageUpdate,
expect.objectContaining({ expect.objectContaining({
content: [{ type: ContentType.Text, text: { value: 'test error', annotations: [] } }], status: 'error',
status: MessageStatus.Error,
error_code: 500, error_code: 500,
}) })
) )

View File

@ -42,7 +42,9 @@ export abstract class OAIEngine extends AIEngine {
*/ */
override onLoad() { override onLoad() {
super.onLoad() super.onLoad()
events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data)) events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
this.inference(data)
)
events.on(InferenceEvent.OnInferenceStopped, () => this.stopInference()) events.on(InferenceEvent.OnInferenceStopped, () => this.stopInference())
} }
@ -128,7 +130,9 @@ export abstract class OAIEngine extends AIEngine {
events.emit(MessageEvent.OnMessageUpdate, message) events.emit(MessageEvent.OnMessageUpdate, message)
}, },
complete: async () => { complete: async () => {
message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error message.status = message.content.length
? MessageStatus.Ready
: MessageStatus.Error
events.emit(MessageEvent.OnMessageUpdate, message) events.emit(MessageEvent.OnMessageUpdate, message)
}, },
error: async (err: any) => { error: async (err: any) => {
@ -141,7 +145,10 @@ export abstract class OAIEngine extends AIEngine {
message.content[0] = { message.content[0] = {
type: ContentType.Text, type: ContentType.Text,
text: { text: {
value: err.message, value:
typeof message === 'string'
? err.message
: (JSON.stringify(err.message) ?? err.detail),
annotations: [], annotations: [],
}, },
} }

View File

@ -1,14 +1,17 @@
import { lastValueFrom, Observable } from 'rxjs' import { lastValueFrom, Observable } from 'rxjs'
import { requestInference } from './sse' import { requestInference } from './sse'
import { ReadableStream } from 'stream/web'; import { ReadableStream } from 'stream/web'
describe('requestInference', () => { describe('requestInference', () => {
it('should send a request to the inference server and return an Observable', () => { it('should send a request to the inference server and return an Observable', () => {
// Mock the fetch function // Mock the fetch function
const mockFetch: any = jest.fn(() => const mockFetch: any = jest.fn(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }), json: () =>
Promise.resolve({
choices: [{ message: { content: 'Generated response' } }],
}),
headers: new Headers(), headers: new Headers(),
redirected: false, redirected: false,
status: 200, status: 200,
@ -36,7 +39,10 @@ describe('requestInference', () => {
const mockFetch: any = jest.fn(() => const mockFetch: any = jest.fn(() =>
Promise.resolve({ Promise.resolve({
ok: false, ok: false,
json: () => Promise.resolve({ error: { message: 'Wrong API Key', code: 'invalid_api_key' } }), json: () =>
Promise.resolve({
error: { message: 'Invalid API Key.', code: 'invalid_api_key' },
}),
headers: new Headers(), headers: new Headers(),
redirected: false, redirected: false,
status: 401, status: 401,
@ -56,16 +62,22 @@ describe('requestInference', () => {
// Assert the expected behavior // Assert the expected behavior
expect(result).toBeInstanceOf(Observable) expect(result).toBeInstanceOf(Observable)
expect(lastValueFrom(result)).rejects.toEqual({ message: 'Wrong API Key', code: 'invalid_api_key' }) expect(lastValueFrom(result)).rejects.toEqual({
message: 'Invalid API Key.',
code: 'invalid_api_key',
})
}) })
}) })
it('should handle a successful response with a transformResponse function', () => { it('should handle a successful response with a transformResponse function', () => {
// Mock the fetch function // Mock the fetch function
const mockFetch: any = jest.fn(() => const mockFetch: any = jest.fn(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }), json: () =>
Promise.resolve({
choices: [{ message: { content: 'Generated response' } }],
}),
headers: new Headers(), headers: new Headers(),
redirected: false, redirected: false,
status: 200, status: 200,
@ -78,47 +90,57 @@ describe('requestInference', () => {
const inferenceUrl = 'https://inference-server.com' const inferenceUrl = 'https://inference-server.com'
const requestBody = { message: 'Hello' } const requestBody = { message: 'Hello' }
const model = { id: 'model-id', parameters: { stream: false } } const model = { id: 'model-id', parameters: { stream: false } }
const transformResponse = (data: any) => data.choices[0].message.content.toUpperCase() const transformResponse = (data: any) =>
data.choices[0].message.content.toUpperCase()
// Call the function // Call the function
const result = requestInference(inferenceUrl, requestBody, model, undefined, undefined, transformResponse) const result = requestInference(
inferenceUrl,
requestBody,
model,
undefined,
undefined,
transformResponse
)
// Assert the expected behavior // Assert the expected behavior
expect(result).toBeInstanceOf(Observable) expect(result).toBeInstanceOf(Observable)
expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE') expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE')
}) })
it('should handle a successful response with streaming enabled', () => {
it('should handle a successful response with streaming enabled', () => {
// Mock the fetch function // Mock the fetch function
const mockFetch: any = jest.fn(() => const mockFetch: any = jest.fn(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
body: new ReadableStream({ body: new ReadableStream({
start(controller) { start(controller) {
controller.enqueue(new TextEncoder().encode('data: {"choices": [{"delta": {"content": "Streamed"}}]}')); controller.enqueue(
controller.enqueue(new TextEncoder().encode('data: [DONE]')); new TextEncoder().encode(
controller.close(); 'data: {"choices": [{"delta": {"content": "Streamed"}}]}'
} )
)
controller.enqueue(new TextEncoder().encode('data: [DONE]'))
controller.close()
},
}), }),
headers: new Headers(), headers: new Headers(),
redirected: false, redirected: false,
status: 200, status: 200,
statusText: 'OK', statusText: 'OK',
}) })
); )
jest.spyOn(global, 'fetch').mockImplementation(mockFetch); jest.spyOn(global, 'fetch').mockImplementation(mockFetch)
// Define the test inputs // Define the test inputs
const inferenceUrl = 'https://inference-server.com'; const inferenceUrl = 'https://inference-server.com'
const requestBody = { message: 'Hello' }; const requestBody = { message: 'Hello' }
const model = { id: 'model-id', parameters: { stream: true } }; const model = { id: 'model-id', parameters: { stream: true } }
// Call the function // Call the function
const result = requestInference(inferenceUrl, requestBody, model); const result = requestInference(inferenceUrl, requestBody, model)
// Assert the expected behavior // Assert the expected behavior
expect(result).toBeInstanceOf(Observable); expect(result).toBeInstanceOf(Observable)
expect(lastValueFrom(result)).resolves.toEqual('Streamed'); expect(lastValueFrom(result)).resolves.toEqual('Streamed')
}); })

View File

@ -32,21 +32,20 @@ export function requestInference(
}) })
.then(async (response) => { .then(async (response) => {
if (!response.ok) { if (!response.ok) {
const data = await response.json() if (response.status === 401) {
let errorCode = ErrorCode.Unknown throw {
if (data.error) { code: ErrorCode.InvalidApiKey,
errorCode = data.error.code ?? data.error.type ?? ErrorCode.Unknown message: 'Invalid API Key.',
} else if (response.status === 401) {
errorCode = ErrorCode.InvalidApiKey
} }
const error = {
message: data.error?.message ?? data.message ?? 'Error occurred.',
code: errorCode,
} }
subscriber.error(error) let data = await response.json()
subscriber.complete() try {
handleError(data)
} catch (err) {
subscriber.error(err)
return return
} }
}
// There could be overriden stream parameter in the model // There could be overriden stream parameter in the model
// that is set in request body (transformed payload) // that is set in request body (transformed payload)
if ( if (
@ -54,9 +53,10 @@ export function requestInference(
model.parameters?.stream === false model.parameters?.stream === false
) { ) {
const data = await response.json() const data = await response.json()
if (data.error || data.message) { try {
subscriber.error(data.error ?? data) handleError(data)
subscriber.complete() } catch (err) {
subscriber.error(err)
return return
} }
if (transformResponse) { if (transformResponse) {
@ -91,13 +91,10 @@ export function requestInference(
const toParse = cachedLines + line const toParse = cachedLines + line
if (!line.includes('data: [DONE]')) { if (!line.includes('data: [DONE]')) {
const data = JSON.parse(toParse.replace('data: ', '')) const data = JSON.parse(toParse.replace('data: ', ''))
if ( try {
'error' in data || handleError(data)
'message' in data || } catch (err) {
'detail' in data subscriber.error(err)
) {
subscriber.error(data.error ?? data)
subscriber.complete()
return return
} }
content += data.choices[0]?.delta?.content ?? '' content += data.choices[0]?.delta?.content ?? ''
@ -118,3 +115,18 @@ export function requestInference(
.catch((err) => subscriber.error(err)) .catch((err) => subscriber.error(err))
}) })
} }
/**
* Handle error and normalize it to a common format.
* @param data
*/
const handleError = (data: any) => {
if (
data.error ||
data.message ||
data.detail ||
(Array.isArray(data) && data.length && data[0].error)
) {
throw data.error ?? data[0]?.error ?? data
}
}

View File

@ -0,0 +1,26 @@
import { HardwareInformation } from '../../types'
import { BaseExtension, ExtensionTypeEnum } from '../extension'
/**
* Engine management extension. Persists and retrieves engine management.
* @abstract
* @extends BaseExtension
*/
export abstract class HardwareManagementExtension extends BaseExtension {
type(): ExtensionTypeEnum | undefined {
return ExtensionTypeEnum.Hardware
}
/**
* @returns A Promise that resolves to an object of list hardware.
*/
abstract getHardware(): Promise<HardwareInformation>
/**
* @returns A Promise that resolves to an object of set active gpus.
*/
abstract setAvtiveGpu(data: { gpus: number[] }): Promise<{
message: string
activated_gpus: number[]
}>
}

View File

@ -1,6 +1,5 @@
import { ConversationalExtension } from './index'; import { ConversationalExtension } from './index';
import { InferenceExtension } from './index'; import { InferenceExtension } from './index';
import { MonitoringExtension } from './index';
import { AssistantExtension } from './index'; import { AssistantExtension } from './index';
import { ModelExtension } from './index'; import { ModelExtension } from './index';
import * as Engines from './index'; import * as Engines from './index';
@ -14,10 +13,6 @@ describe('index.ts exports', () => {
expect(InferenceExtension).toBeDefined(); expect(InferenceExtension).toBeDefined();
}); });
test('should export MonitoringExtension', () => {
expect(MonitoringExtension).toBeDefined();
});
test('should export AssistantExtension', () => { test('should export AssistantExtension', () => {
expect(AssistantExtension).toBeDefined(); expect(AssistantExtension).toBeDefined();
}); });

View File

@ -9,10 +9,7 @@ export { ConversationalExtension } from './conversational'
*/ */
export { InferenceExtension } from './inference' export { InferenceExtension } from './inference'
/**
* Monitoring extension for system monitoring.
*/
export { MonitoringExtension } from './monitoring'
/** /**
* Assistant extension for managing assistants. * Assistant extension for managing assistants.
@ -33,3 +30,8 @@ export * from './engines'
* Engines Management * Engines Management
*/ */
export * from './enginesManagement' export * from './enginesManagement'
/**
* Hardware Management
*/
export * from './hardwareManagement'

View File

@ -1,42 +0,0 @@
import { ExtensionTypeEnum } from '../extension';
import { MonitoringExtension } from './monitoring';
it('should have the correct type', () => {
class TestMonitoringExtension extends MonitoringExtension {
getGpuSetting(): Promise<GpuSetting | undefined> {
throw new Error('Method not implemented.');
}
getResourcesInfo(): Promise<any> {
throw new Error('Method not implemented.');
}
getCurrentLoad(): Promise<any> {
throw new Error('Method not implemented.');
}
getOsInfo(): Promise<OperatingSystemInfo> {
throw new Error('Method not implemented.');
}
}
const monitoringExtension = new TestMonitoringExtension();
expect(monitoringExtension.type()).toBe(ExtensionTypeEnum.SystemMonitoring);
});
it('should create an instance of MonitoringExtension', () => {
class TestMonitoringExtension extends MonitoringExtension {
getGpuSetting(): Promise<GpuSetting | undefined> {
throw new Error('Method not implemented.');
}
getResourcesInfo(): Promise<any> {
throw new Error('Method not implemented.');
}
getCurrentLoad(): Promise<any> {
throw new Error('Method not implemented.');
}
getOsInfo(): Promise<OperatingSystemInfo> {
throw new Error('Method not implemented.');
}
}
const monitoringExtension = new TestMonitoringExtension();
expect(monitoringExtension).toBeInstanceOf(MonitoringExtension);
});

View File

@ -1,20 +0,0 @@
import { BaseExtension, ExtensionTypeEnum } from '../extension'
import { GpuSetting, MonitoringInterface, OperatingSystemInfo } from '../../types'
/**
* Monitoring extension for system monitoring.
* @extends BaseExtension
*/
export abstract class MonitoringExtension extends BaseExtension implements MonitoringInterface {
/**
* Monitoring extension type.
*/
type(): ExtensionTypeEnum | undefined {
return ExtensionTypeEnum.SystemMonitoring
}
abstract getGpuSetting(): Promise<GpuSetting | undefined>
abstract getResourcesInfo(): Promise<any>
abstract getCurrentLoad(): Promise<any>
abstract getOsInfo(): Promise<OperatingSystemInfo>
}

View File

@ -1,4 +1,5 @@
export type AppConfiguration = { export type AppConfiguration = {
data_folder: string data_folder: string
quick_ask: boolean quick_ask: boolean
distinct_id?: string
} }

View File

@ -18,6 +18,7 @@ export type EngineMetadata = {
template?: string template?: string
} }
} }
explore_models_url?: string
} }
export type EngineVariant = { export type EngineVariant = {

View File

@ -0,0 +1,55 @@
export type Cpu = {
arch: string
cores: number
instructions: string[]
model: string
usage: number
}
export type GpuAdditionalInformation = {
compute_cap: string
driver_version: string
}
export type Gpu = {
activated: boolean
additional_information?: GpuAdditionalInformation
free_vram: number
id: string
name: string
total_vram: number
uuid: string
version: string
}
export type Os = {
name: string
version: string
}
export type Power = {
battery_life: number
charging_status: string
is_power_saving: boolean
}
export type Ram = {
available: number
total: number
type: string
}
export type Storage = {
available: number
total: number
type: string
}
export type HardwareInformation = {
cpu: Cpu
gpus: Gpu[]
os: Os
power: Power
ram: Ram
storage: Storage
}

View File

@ -4,7 +4,6 @@ import * as model from './model';
import * as thread from './thread'; import * as thread from './thread';
import * as message from './message'; import * as message from './message';
import * as inference from './inference'; import * as inference from './inference';
import * as monitoring from './monitoring';
import * as file from './file'; import * as file from './file';
import * as config from './config'; import * as config from './config';
import * as huggingface from './huggingface'; import * as huggingface from './huggingface';
@ -18,7 +17,6 @@ import * as setting from './setting';
expect(thread).toBeDefined(); expect(thread).toBeDefined();
expect(message).toBeDefined(); expect(message).toBeDefined();
expect(inference).toBeDefined(); expect(inference).toBeDefined();
expect(monitoring).toBeDefined();
expect(file).toBeDefined(); expect(file).toBeDefined();
expect(config).toBeDefined(); expect(config).toBeDefined();
expect(huggingface).toBeDefined(); expect(huggingface).toBeDefined();

View File

@ -3,7 +3,6 @@ export * from './model'
export * from './thread' export * from './thread'
export * from './message' export * from './message'
export * from './inference' export * from './inference'
export * from './monitoring'
export * from './file' export * from './file'
export * from './config' export * from './config'
export * from './huggingface' export * from './huggingface'
@ -11,3 +10,4 @@ export * from './miscellaneous'
export * from './api' export * from './api'
export * from './setting' export * from './setting'
export * from './engine' export * from './engine'
export * from './hardware'

View File

@ -1,33 +1,25 @@
import { GpuAdditionalInformation } from '../hardware'
export type SystemResourceInfo = { export type SystemResourceInfo = {
memAvailable: number memAvailable: number
} }
export type RunMode = 'cpu' | 'gpu'
export type GpuSetting = { export type GpuSetting = {
notify: boolean
run_mode: RunMode
nvidia_driver: {
exist: boolean
version: string
}
cuda: {
exist: boolean
version: string
}
gpus: GpuSettingInfo[] gpus: GpuSettingInfo[]
gpu_highest_vram: string
gpus_in_use: string[]
is_initial: boolean
// TODO: This needs to be set based on user toggle in settings // TODO: This needs to be set based on user toggle in settings
vulkan: boolean vulkan: boolean
cpu?: any
} }
export type GpuSettingInfo = { export type GpuSettingInfo = {
activated: boolean
free_vram: number
id: string id: string
vram: string
name: string name: string
arch?: string total_vram: number
uuid: string
version: string
additional_information?: GpuAdditionalInformation
} }
export type SystemInformation = { export type SystemInformation = {
@ -42,9 +34,6 @@ export type SupportedPlatform = SupportedPlatformTuple[number]
export type OperatingSystemInfo = { export type OperatingSystemInfo = {
platform: SupportedPlatform | 'unknown' platform: SupportedPlatform | 'unknown'
arch: string arch: string
release: string
machine: string
version: string
totalMem: number totalMem: number
freeMem: number freeMem: number
} }

View File

@ -71,7 +71,7 @@ export type Model = {
/** /**
* The model identifier, modern version of id. * The model identifier, modern version of id.
*/ */
mode?: string model?: string
/** /**
* Human-readable name that is used for UI. * Human-readable name that is used for UI.
@ -150,6 +150,7 @@ export type ModelSettingParams = {
*/ */
export type ModelRuntimeParams = { export type ModelRuntimeParams = {
temperature?: number temperature?: number
max_temperature?: number
token_limit?: number token_limit?: number
top_k?: number top_k?: number
top_p?: number top_p?: number

View File

@ -1,13 +0,0 @@
import * as monitoringInterface from './monitoringInterface'
import * as resourceInfo from './resourceInfo'
import * as index from './index'
it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
for (const key in monitoringInterface) {
expect(index[key]).toBe(monitoringInterface[key])
}
for (const key in resourceInfo) {
expect(index[key]).toBe(resourceInfo[key])
}
})

View File

@ -1,2 +0,0 @@
export * from './monitoringInterface'
export * from './resourceInfo'

View File

@ -1,29 +0,0 @@
import { GpuSetting, OperatingSystemInfo } from '../miscellaneous'
/**
* Monitoring extension for system monitoring.
* @extends BaseExtension
*/
export interface MonitoringInterface {
/**
* Returns information about the system resources.
* @returns {Promise<any>} A promise that resolves with the system resources information.
*/
getResourcesInfo(): Promise<any>
/**
* Returns the current system load.
* @returns {Promise<any>} A promise that resolves with the current system load.
*/
getCurrentLoad(): Promise<any>
/**
* Returns the GPU configuration.
*/
getGpuSetting(): Promise<GpuSetting | undefined>
/**
* Returns information about the operating system.
*/
getOsInfo(): Promise<OperatingSystemInfo>
}

View File

@ -1,6 +0,0 @@
export type ResourceInfo = {
mem: {
totalMemory: number
usedMemory: number
}
}

View File

@ -55,7 +55,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Conversational", "productName": "Conversational",
"version": "1.0.0", "version": "1.0.0",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables conversations and state persistence via your filesystem", "description": "This extension enables conversations and state persistence via your filesystem.",
"url": "extension://@janhq/conversational-extension/dist/index.js" "url": "extension://@janhq/conversational-extension/dist/index.js"
}, },
"@janhq/inference-anthropic-extension": { "@janhq/inference-anthropic-extension": {
@ -70,7 +70,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Anthropic Inference Engine", "productName": "Anthropic Inference Engine",
"version": "1.0.2", "version": "1.0.2",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables Anthropic chat completion API calls", "description": "This extension enables Anthropic chat completion API calls.",
"url": "extension://@janhq/inference-anthropic-extension/dist/index.js" "url": "extension://@janhq/inference-anthropic-extension/dist/index.js"
}, },
"@janhq/inference-triton-trt-llm-extension": { "@janhq/inference-triton-trt-llm-extension": {
@ -85,7 +85,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Triton-TRT-LLM Inference Engine", "productName": "Triton-TRT-LLM Inference Engine",
"version": "1.0.0", "version": "1.0.0",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option", "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option.",
"url": "extension://@janhq/inference-triton-trt-llm-extension/dist/index.js" "url": "extension://@janhq/inference-triton-trt-llm-extension/dist/index.js"
}, },
"@janhq/inference-mistral-extension": { "@janhq/inference-mistral-extension": {
@ -100,7 +100,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "MistralAI Inference Engine", "productName": "MistralAI Inference Engine",
"version": "1.0.1", "version": "1.0.1",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables Mistral chat completion API calls", "description": "This extension enables Mistral chat completion API calls.",
"url": "extension://@janhq/inference-mistral-extension/dist/index.js" "url": "extension://@janhq/inference-mistral-extension/dist/index.js"
}, },
"@janhq/inference-martian-extension": { "@janhq/inference-martian-extension": {
@ -115,7 +115,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Martian Inference Engine", "productName": "Martian Inference Engine",
"version": "1.0.1", "version": "1.0.1",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables Martian chat completion API calls", "description": "This extension enables Martian chat completion API calls.",
"url": "extension://@janhq/inference-martian-extension/dist/index.js" "url": "extension://@janhq/inference-martian-extension/dist/index.js"
}, },
"@janhq/inference-openrouter-extension": { "@janhq/inference-openrouter-extension": {
@ -130,7 +130,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "OpenRouter Inference Engine", "productName": "OpenRouter Inference Engine",
"version": "1.0.0", "version": "1.0.0",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables Open Router chat completion API calls", "description": "This extension enables Open Router chat completion API calls.",
"url": "extension://@janhq/inference-openrouter-extension/dist/index.js" "url": "extension://@janhq/inference-openrouter-extension/dist/index.js"
}, },
"@janhq/inference-nvidia-extension": { "@janhq/inference-nvidia-extension": {
@ -145,7 +145,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "NVIDIA NIM Inference Engine", "productName": "NVIDIA NIM Inference Engine",
"version": "1.0.1", "version": "1.0.1",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables NVIDIA chat completion API calls", "description": "This extension enables NVIDIA chat completion API calls.",
"url": "extension://@janhq/inference-nvidia-extension/dist/index.js" "url": "extension://@janhq/inference-nvidia-extension/dist/index.js"
}, },
"@janhq/inference-groq-extension": { "@janhq/inference-groq-extension": {
@ -160,7 +160,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Groq Inference Engine", "productName": "Groq Inference Engine",
"version": "1.0.1", "version": "1.0.1",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables fast Groq chat completion API calls", "description": "This extension enables fast Groq chat completion API calls.",
"url": "extension://@janhq/inference-groq-extension/dist/index.js" "url": "extension://@janhq/inference-groq-extension/dist/index.js"
}, },
"@janhq/inference-openai-extension": { "@janhq/inference-openai-extension": {
@ -175,7 +175,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "OpenAI Inference Engine", "productName": "OpenAI Inference Engine",
"version": "1.0.2", "version": "1.0.2",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables OpenAI chat completion API calls", "description": "This extension enables OpenAI chat completion API calls.",
"url": "extension://@janhq/inference-openai-extension/dist/index.js" "url": "extension://@janhq/inference-openai-extension/dist/index.js"
}, },
"@janhq/inference-cohere-extension": { "@janhq/inference-cohere-extension": {
@ -190,7 +190,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Cohere Inference Engine", "productName": "Cohere Inference Engine",
"version": "1.0.0", "version": "1.0.0",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables Cohere chat completion API calls", "description": "This extension enables Cohere chat completion API calls.",
"url": "extension://@janhq/inference-cohere-extension/dist/index.js" "url": "extension://@janhq/inference-cohere-extension/dist/index.js"
}, },
"@janhq/model-extension": { "@janhq/model-extension": {
@ -205,7 +205,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Model Management", "productName": "Model Management",
"version": "1.0.33", "version": "1.0.33",
"main": "dist/index.js", "main": "dist/index.js",
"description": "Model Management Extension provides model exploration and seamless downloads", "description": "Model Management Extension provides model exploration and seamless downloads.",
"url": "extension://@janhq/model-extension/dist/index.js" "url": "extension://@janhq/model-extension/dist/index.js"
}, },
"@janhq/monitoring-extension": { "@janhq/monitoring-extension": {
@ -220,7 +220,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "System Monitoring", "productName": "System Monitoring",
"version": "1.0.10", "version": "1.0.10",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension provides system health and OS level data", "description": "This extension provides system health and OS level data.",
"url": "extension://@janhq/monitoring-extension/dist/index.js" "url": "extension://@janhq/monitoring-extension/dist/index.js"
}, },
"@janhq/assistant-extension": { "@janhq/assistant-extension": {
@ -235,7 +235,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
"productName": "Jan Assistant", "productName": "Jan Assistant",
"version": "1.0.1", "version": "1.0.1",
"main": "dist/index.js", "main": "dist/index.js",
"description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models", "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models.",
"url": "extension://@janhq/assistant-extension/dist/index.js" "url": "extension://@janhq/assistant-extension/dist/index.js"
}, },
"@janhq/tensorrt-llm-extension": { "@janhq/tensorrt-llm-extension": {

View File

@ -47,8 +47,8 @@ To add a new remote engine:
|-------|-------------|----------| |-------|-------------|----------|
| Engine Name | Name for your engine (e.g., "OpenAI", "Claude") | ✓ | | Engine Name | Name for your engine (e.g., "OpenAI", "Claude") | ✓ |
| API URL | The base URL of the provider's API | ✓ | | API URL | The base URL of the provider's API | ✓ |
| API Key | Your authentication key from the provider | ✓ | | API Key | Your authentication key to activate this engine | ✓ |
| Model List URL | URL for fetching available models | | | Model List URL | The endpoint URL to fetch available models |
| API Key Template | Custom authorization header format | | | API Key Template | Custom authorization header format | |
| Request Format Conversion | Function to convert Jan's request format to provider's format | | | Request Format Conversion | Function to convert Jan's request format to provider's format | |
| Response Format Conversion | Function to convert provider's response format to Jan's format | | | Response Format Conversion | Function to convert provider's response format to Jan's format | |

View File

@ -28,6 +28,7 @@ import { setupReactDevTool } from './utils/dev'
import { trayManager } from './managers/tray' import { trayManager } from './managers/tray'
import { logSystemInfo } from './utils/system' import { logSystemInfo } from './utils/system'
import { registerGlobalShortcuts } from './utils/shortcut' import { registerGlobalShortcuts } from './utils/shortcut'
import { registerLogger } from './utils/logger'
const preloadPath = join(__dirname, 'preload.js') const preloadPath = join(__dirname, 'preload.js')
const rendererPath = join(__dirname, '..', 'renderer') const rendererPath = join(__dirname, '..', 'renderer')
@ -79,6 +80,7 @@ app
}) })
.then(setupCore) .then(setupCore)
.then(createUserSpace) .then(createUserSpace)
.then(registerLogger)
.then(migrate) .then(migrate)
.then(setupExtensions) .then(setupExtensions)
.then(setupMenu) .then(setupMenu)

View File

@ -1,6 +1,6 @@
{ {
"name": "jan", "name": "jan",
"version": "0.1.4", "version": "0.1.1737985524",
"main": "./build/main.js", "main": "./build/main.js",
"author": "Jan <service@jan.ai>", "author": "Jan <service@jan.ai>",
"license": "MIT", "license": "MIT",

View File

@ -1,16 +1,28 @@
import fs from 'fs' import {
createWriteStream,
existsSync,
mkdirSync,
readdir,
stat,
unlink,
writeFileSync,
} from 'fs'
import util from 'util' import util from 'util'
import { import {
getAppConfigurations, getAppConfigurations,
getJanDataFolderPath, getJanDataFolderPath,
Logger, Logger,
LoggerManager,
} from '@janhq/core/node' } from '@janhq/core/node'
import path, { join } from 'path' import path, { join } from 'path'
export class FileLogger extends Logger { /**
* File Logger
*/
export class FileLogger implements Logger {
name = 'file' name = 'file'
logCleaningInterval: number = 120000 logCleaningInterval: number = 120000
timeout: NodeJS.Timeout | null = null timeout: NodeJS.Timeout | undefined
appLogPath: string = './' appLogPath: string = './'
logEnabled: boolean = true logEnabled: boolean = true
@ -18,14 +30,13 @@ export class FileLogger extends Logger {
logEnabled: boolean = true, logEnabled: boolean = true,
logCleaningInterval: number = 120000 logCleaningInterval: number = 120000
) { ) {
super()
this.logEnabled = logEnabled this.logEnabled = logEnabled
if (logCleaningInterval) this.logCleaningInterval = logCleaningInterval if (logCleaningInterval) this.logCleaningInterval = logCleaningInterval
const appConfigurations = getAppConfigurations() const appConfigurations = getAppConfigurations()
const logFolderPath = join(appConfigurations.data_folder, 'logs') const logFolderPath = join(appConfigurations.data_folder, 'logs')
if (!fs.existsSync(logFolderPath)) { if (!existsSync(logFolderPath)) {
fs.mkdirSync(logFolderPath, { recursive: true }) mkdirSync(logFolderPath, { recursive: true })
} }
this.appLogPath = join(logFolderPath, 'app.log') this.appLogPath = join(logFolderPath, 'app.log')
@ -69,8 +80,8 @@ export class FileLogger extends Logger {
const logDirectory = path.join(getJanDataFolderPath(), 'logs') const logDirectory = path.join(getJanDataFolderPath(), 'logs')
// Perform log cleaning // Perform log cleaning
const currentDate = new Date() const currentDate = new Date()
if (fs.existsSync(logDirectory)) if (existsSync(logDirectory))
fs.readdir(logDirectory, (err, files) => { readdir(logDirectory, (err, files) => {
if (err) { if (err) {
console.error('Error reading log directory:', err) console.error('Error reading log directory:', err)
return return
@ -78,7 +89,7 @@ export class FileLogger extends Logger {
files.forEach((file) => { files.forEach((file) => {
const filePath = path.join(logDirectory, file) const filePath = path.join(logDirectory, file)
fs.stat(filePath, (err, stats) => { stat(filePath, (err, stats) => {
if (err) { if (err) {
console.error('Error getting file stats:', err) console.error('Error getting file stats:', err)
return return
@ -86,7 +97,7 @@ export class FileLogger extends Logger {
// Check size // Check size
if (stats.size > size) { if (stats.size > size) {
fs.unlink(filePath, (err) => { unlink(filePath, (err) => {
if (err) { if (err) {
console.error('Error deleting log file:', err) console.error('Error deleting log file:', err)
return return
@ -103,7 +114,7 @@ export class FileLogger extends Logger {
(1000 * 3600 * 24) (1000 * 3600 * 24)
) )
if (daysDifference > days) { if (daysDifference > days) {
fs.unlink(filePath, (err) => { unlink(filePath, (err) => {
if (err) { if (err) {
console.error('Error deleting log file:', err) console.error('Error deleting log file:', err)
return return
@ -124,15 +135,20 @@ export class FileLogger extends Logger {
} }
} }
/**
* Write log function implementation
* @param message
* @param logPath
*/
const writeLog = (message: string, logPath: string) => { const writeLog = (message: string, logPath: string) => {
if (!fs.existsSync(logPath)) { if (!existsSync(logPath)) {
const logDirectory = path.join(getJanDataFolderPath(), 'logs') const logDirectory = path.join(getJanDataFolderPath(), 'logs')
if (!fs.existsSync(logDirectory)) { if (!existsSync(logDirectory)) {
fs.mkdirSync(logDirectory) mkdirSync(logDirectory)
} }
fs.writeFileSync(logPath, message) writeFileSync(logPath, message)
} else { } else {
const logFile = fs.createWriteStream(logPath, { const logFile = createWriteStream(logPath, {
flags: 'a', flags: 'a',
}) })
logFile.write(util.format(message) + '\n') logFile.write(util.format(message) + '\n')
@ -140,3 +156,12 @@ const writeLog = (message: string, logPath: string) => {
console.debug(message) console.debug(message)
} }
} }
/**
* Register logger for global application logging
*/
export const registerLogger = () => {
const logger = new FileLogger()
LoggerManager.instance().register(logger)
logger.cleanLogs()
}

View File

@ -6,6 +6,8 @@ import groq from './resources/groq.json' with { type: 'json' }
import martian from './resources/martian.json' with { type: 'json' } import martian from './resources/martian.json' with { type: 'json' }
import mistral from './resources/mistral.json' with { type: 'json' } import mistral from './resources/mistral.json' with { type: 'json' }
import nvidia from './resources/nvidia.json' with { type: 'json' } import nvidia from './resources/nvidia.json' with { type: 'json' }
import deepseek from './resources/deepseek.json' with { type: 'json' }
import googleGemini from './resources/google_gemini.json' with { type: 'json' }
import anthropicModels from './models/anthropic.json' with { type: 'json' } import anthropicModels from './models/anthropic.json' with { type: 'json' }
import cohereModels from './models/cohere.json' with { type: 'json' } import cohereModels from './models/cohere.json' with { type: 'json' }
@ -15,6 +17,8 @@ import groqModels from './models/groq.json' with { type: 'json' }
import martianModels from './models/martian.json' with { type: 'json' } import martianModels from './models/martian.json' with { type: 'json' }
import mistralModels from './models/mistral.json' with { type: 'json' } import mistralModels from './models/mistral.json' with { type: 'json' }
import nvidiaModels from './models/nvidia.json' with { type: 'json' } import nvidiaModels from './models/nvidia.json' with { type: 'json' }
import deepseekModels from './models/deepseek.json' with { type: 'json' }
import googleGeminiModels from './models/google_gemini.json' with { type: 'json' }
const engines = [ const engines = [
anthropic, anthropic,
@ -25,6 +29,8 @@ const engines = [
mistral, mistral,
martian, martian,
nvidia, nvidia,
deepseek,
googleGemini,
] ]
const models = [ const models = [
...anthropicModels, ...anthropicModels,
@ -35,5 +41,7 @@ const models = [
...mistralModels, ...mistralModels,
...martianModels, ...martianModels,
...nvidiaModels, ...nvidiaModels,
...deepseekModels,
...googleGeminiModels,
] ]
export { engines, models } export { engines, models }

View File

@ -8,6 +8,7 @@
"inference_params": { "inference_params": {
"max_tokens": 4096, "max_tokens": 4096,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"stream": true "stream": true
}, },
"engine": "anthropic" "engine": "anthropic"
@ -21,6 +22,7 @@
"inference_params": { "inference_params": {
"max_tokens": 8192, "max_tokens": 8192,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"stream": true "stream": true
}, },
"engine": "anthropic" "engine": "anthropic"
@ -34,6 +36,7 @@
"inference_params": { "inference_params": {
"max_tokens": 8192, "max_tokens": 8192,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"stream": true "stream": true
}, },
"engine": "anthropic" "engine": "anthropic"

View File

@ -8,6 +8,7 @@
"inference_params": { "inference_params": {
"max_tokens": 4096, "max_tokens": 4096,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"stream": false "stream": false
}, },
"engine": "cohere" "engine": "cohere"
@ -21,6 +22,7 @@
"inference_params": { "inference_params": {
"max_tokens": 4096, "max_tokens": 4096,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"stream": false "stream": false
}, },
"engine": "cohere" "engine": "cohere"

View File

@ -0,0 +1,28 @@
[
{
"model": "deepseek-chat",
"object": "model",
"name": "DeepSeek Chat",
"version": "1.0",
"description": "The deepseek-chat model has been upgraded to DeepSeek-V3. deepseek-reasoner points to the new model DeepSeek-R1",
"inference_params": {
"max_tokens": 8192,
"temperature": 0.6,
"stream": true
},
"engine": "deepseek"
},
{
"model": "deepseek-reasoner",
"object": "model",
"name": "DeepSeek R1",
"version": "1.0",
"description": "CoT (Chain of Thought) is the reasoning content deepseek-reasoner gives before output the final answer. For details, please refer to Reasoning Model.",
"inference_params": {
"max_tokens": 8192,
"temperature": 0.6,
"stream": true
},
"engine": "deepseek"
}
]

View File

@ -0,0 +1,67 @@
[
{
"model": "gemini-2.0-flash",
"object": "model",
"name": "Gemini 2.0 Flash",
"version": "1.0",
"description": "Gemini 2.0 Flash delivers next-gen features and improved capabilities, including superior speed, native tool use, multimodal generation, and a 1M token context window.",
"inference_params": {
"max_tokens": 8192,
"temperature": 0.6,
"stream": true
},
"engine": "google_gemini"
},
{
"model": "gemini-2.0-flash-lite-preview",
"object": "model",
"name": "Gemini 2.0 Flash-Lite Preview",
"version": "1.0",
"description": "A Gemini 2.0 Flash model optimized for cost efficiency and low latency.",
"inference_params": {
"max_tokens": 8192,
"temperature": 0.6,
"stream": true
},
"engine": "google_gemini"
},
{
"model": "gemini-1.5-flash",
"object": "model",
"name": "Gemini 1.5 Flash",
"version": "1.0",
"description": "Gemini 1.5 Flash is a fast and versatile multimodal model for scaling across diverse tasks.",
"inference_params": {
"max_tokens": 8192,
"temperature": 0.6,
"stream": true
},
"engine": "google_gemini"
},
{
"model": "gemini-1.5-flash-8b",
"object": "model",
"name": "Gemini 1.5 Flash-8B",
"version": "1.0",
"description": "Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks.",
"inference_params": {
"max_tokens": 8192,
"temperature": 0.6,
"stream": true
},
"engine": "google_gemini"
},
{
"model": "gemini-1.5-pro",
"object": "model",
"name": "Gemini 1.5 Pro",
"version": "1.0",
"description": "Gemini 1.5 Pro is a mid-size multimodal model that is optimized for a wide-range of reasoning tasks. 1.5 Pro can process large amounts of data at once, including 2 hours of video, 19 hours of audio, codebases with 60,000 lines of code, or 2,000 pages of text. ",
"inference_params": {
"max_tokens": 8192,
"temperature": 0.6,
"stream": true
},
"engine": "google_gemini"
}
]

View File

@ -8,6 +8,7 @@
"inference_params": { "inference_params": {
"max_tokens": 32000, "max_tokens": 32000,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"top_p": 0.95, "top_p": 0.95,
"stream": true "stream": true
}, },
@ -22,6 +23,7 @@
"inference_params": { "inference_params": {
"max_tokens": 32000, "max_tokens": 32000,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"top_p": 0.95, "top_p": 0.95,
"stream": true "stream": true
}, },
@ -36,6 +38,7 @@
"inference_params": { "inference_params": {
"max_tokens": 32000, "max_tokens": 32000,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"top_p": 0.95, "top_p": 0.95,
"stream": true "stream": true
}, },

View File

@ -8,6 +8,7 @@
"inference_params": { "inference_params": {
"max_tokens": 1024, "max_tokens": 1024,
"temperature": 0.3, "temperature": 0.3,
"max_temperature": 1.0,
"top_p": 1, "top_p": 1,
"stream": false, "stream": false,
"frequency_penalty": 0, "frequency_penalty": 0,

View File

@ -79,12 +79,7 @@
"description": "OpenAI o1 is a new model with complex reasoning", "description": "OpenAI o1 is a new model with complex reasoning",
"format": "api", "format": "api",
"inference_params": { "inference_params": {
"max_tokens": 100000, "max_tokens": 100000
"temperature": 1,
"top_p": 1,
"stream": true,
"frequency_penalty": 0,
"presence_penalty": 0
}, },
"engine": "openai" "engine": "openai"
}, },
@ -97,11 +92,7 @@
"format": "api", "format": "api",
"inference_params": { "inference_params": {
"max_tokens": 32768, "max_tokens": 32768,
"temperature": 1, "stream": true
"top_p": 1,
"stream": true,
"frequency_penalty": 0,
"presence_penalty": 0
}, },
"engine": "openai" "engine": "openai"
}, },
@ -114,11 +105,20 @@
"format": "api", "format": "api",
"inference_params": { "inference_params": {
"max_tokens": 65536, "max_tokens": 65536,
"temperature": 1, "stream": true
"top_p": 1, },
"stream": true, "engine": "openai"
"frequency_penalty": 0, },
"presence_penalty": 0 {
"model": "o3-mini",
"object": "model",
"name": "OpenAI o3-mini",
"version": "1.0",
"description": "OpenAI most recent reasoning model, providing high intelligence at the same cost and latency targets of o1-mini.",
"format": "api",
"inference_params": {
"max_tokens": 100000,
"stream": true
}, },
"engine": "openai" "engine": "openai"
} }

View File

@ -1,16 +1,91 @@
[ [
{ {
"model": "open-router-auto", "model": "deepseek/deepseek-r1:free",
"object": "model", "object": "model",
"name": "OpenRouter", "name": "DeepSeek: R1",
"version": "1.0", "version": "1.0",
"description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.", "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"inference_params": { "inference_params": {
"max_tokens": 128000,
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95, "top_p": 0.95,
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0,
"stream": true
},
"engine": "openrouter"
},
{
"model": "deepseek/deepseek-r1-distill-llama-70b:free",
"object": "model",
"name": "DeepSeek: R1 Distill Llama 70B",
"version": "1.0",
"description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"inference_params": {
"temperature": 0.7,
"top_p": 0.95,
"frequency_penalty": 0,
"presence_penalty": 0,
"stream": true
},
"engine": "openrouter"
},
{
"model": "deepseek/deepseek-r1-distill-llama-70b:free",
"object": "model",
"name": "DeepSeek: R1 Distill Llama 70B",
"version": "1.0",
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"inference_params": {
"temperature": 0.7,
"top_p": 0.95,
"frequency_penalty": 0,
"presence_penalty": 0,
"stream": true
},
"engine": "openrouter"
},
{
"model": "meta-llama/llama-3.1-405b-instruct:free",
"object": "model",
"name": "Meta: Llama 3.1 405B Instruct",
"version": "1.0",
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"inference_params": {
"temperature": 0.7,
"top_p": 0.95,
"frequency_penalty": 0,
"presence_penalty": 0,
"stream": true
},
"engine": "openrouter"
},
{
"model": "qwen/qwen-vl-plus:free",
"object": "model",
"name": "Qwen: Qwen VL Plus",
"version": "1.0",
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"inference_params": {
"temperature": 0.7,
"top_p": 0.95,
"frequency_penalty": 0,
"presence_penalty": 0,
"stream": true
},
"engine": "openrouter"
},
{
"model": "qwen/qwen2.5-vl-72b-instruct:free",
"object": "model",
"name": "Qwen: Qwen2.5 VL 72B Instruct",
"version": "1.0",
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"inference_params": {
"temperature": 0.7,
"top_p": 0.95,
"frequency_penalty": 0,
"presence_penalty": 0,
"stream": true
}, },
"engine": "openrouter" "engine": "openrouter"
} }

View File

@ -29,12 +29,10 @@
}, },
"dependencies": { "dependencies": {
"@janhq/core": "../../core/package.tgz", "@janhq/core": "../../core/package.tgz",
"cpu-instructions": "^0.0.13",
"ky": "^1.7.2", "ky": "^1.7.2",
"p-queue": "^8.0.1" "p-queue": "^8.0.1"
}, },
"bundledDependencies": [ "bundledDependencies": [
"cpu-instructions",
"@janhq/core" "@janhq/core"
], ],
"engines": { "engines": {

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-anthropic-extension", "id": "anthropic",
"type": "remote", "type": "remote",
"engine": "anthropic", "engine": "anthropic",
"url": "https://console.anthropic.com/settings/keys", "url": "https://console.anthropic.com/settings/keys",
@ -10,13 +10,14 @@
"transform_req": { "transform_req": {
"chat_completions": { "chat_completions": {
"url": "https://api.anthropic.com/v1/messages", "url": "https://api.anthropic.com/v1/messages",
"template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": \"{{ input_request.messages.0.content }}\", \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": \"{{ message.role}}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }" "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": {{ tojson(input_request.messages.0.content) }}, \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"metadata\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
} }
}, },
"transform_resp": { "transform_resp": {
"chat_completions": { "chat_completions": {
"template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": \"{{ input_request.model }}\", \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"{{ input_request.role }}\", \"content\": {% if input_request.content and input_request.content.0.type == \"text\" %} \"{{input_request.content.0.text}}\" {% else %} null {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.stop_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.usage.input_tokens }}, \"completion_tokens\": {{ input_request.usage.output_tokens }}, \"total_tokens\": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 }, \"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}" "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {{tojson(input_request)}} {% endif %}"
}
} }
},
"explore_models_url": "https://docs.anthropic.com/en/docs/about-claude/models"
} }
} }

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-cohere-extension", "id": "cohere",
"type": "remote", "type": "remote",
"engine": "cohere", "engine": "cohere",
"url": "https://dashboard.cohere.com/api-keys", "url": "https://dashboard.cohere.com/api-keys",
@ -10,13 +10,14 @@
"transform_req": { "transform_req": {
"chat_completions": { "chat_completions": {
"url": "https://api.cohere.ai/v1/chat", "url": "https://api.cohere.ai/v1/chat",
"template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": \"{{ input_request.messages.0.content }}\", {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }" "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": {{ tojson(input_request.messages.0.content) }}, {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
} }
}, },
"transform_resp": { "transform_resp": {
"chat_completions": { "chat_completions": {
"template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.event_type == \"text-generation\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.text }}\" {% else %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.event_type == \"stream-end\" %} \"finish_reason\": \"{{ input_request.finish_reason }}\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.generation_id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": {% if input_request.model %} \"{{ input_request.model }}\" {% else %} \"command-r-plus-08-2024\" {% endif %}, \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"assistant\", \"content\": {% if not input_request.text %} null {% else %} \"{{ input_request.text }}\" {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.finish_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.meta.tokens.input_tokens }}, \"completion_tokens\": {{ input_request.meta.tokens.output_tokens }},\"total_tokens\": {{ input_request.meta.tokens.input_tokens + input_request.meta.tokens.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 },\"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}" "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.event_type == \"text-generation\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.text }}\" {% else %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.event_type == \"stream-end\" %} \"finish_reason\": \"{{ input_request.finish_reason }}\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.generation_id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": {% if input_request.model %} \"{{ input_request.model }}\" {% else %} \"command-r-plus-08-2024\" {% endif %}, \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"assistant\", \"content\": {% if not input_request.text %} null {% else %} \"{{ input_request.text }}\" {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.finish_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.meta.tokens.input_tokens }}, \"completion_tokens\": {{ input_request.meta.tokens.output_tokens }},\"total_tokens\": {{ input_request.meta.tokens.input_tokens + input_request.meta.tokens.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 },\"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
} }
} },
"explore_models_url": "https://docs.cohere.com/v2/docs/models"
} }
} }

View File

@ -0,0 +1,23 @@
{
"id": "deepseek",
"type": "remote",
"engine": "deepseek",
"url": "https://platform.deepseek.com/api_keys",
"api_key": "",
"metadata": {
"get_models_url": "https://api.deepseek.com/models",
"header_template": "Authorization: Bearer {{api_key}}",
"transform_req": {
"chat_completions": {
"url": "https://api.deepseek.com/chat/completions",
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
}
},
"transform_resp": {
"chat_completions": {
"template": "{{tojson(input_request)}}"
}
},
"explore_models_url": "https://api-docs.deepseek.com/quick_start/pricing"
}
}

View File

@ -0,0 +1,23 @@
{
"id": "google_gemini",
"type": "remote",
"engine": "google_gemini",
"url": "https://aistudio.google.com/apikey",
"api_key": "",
"metadata": {
"get_models_url": "https://generativelanguage.googleapis.com/v1beta/models",
"header_template": "Authorization: Bearer {{api_key}}",
"transform_req": {
"chat_completions": {
"url": "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
}
},
"transform_resp": {
"chat_completions": {
"template": "{{tojson(input_request)}}"
}
},
"explore_models_url": "https://ai.google.dev/gemini-api/docs/models/gemini"
}
}

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-groq-extension", "id": "groq",
"type": "remote", "type": "remote",
"engine": "groq", "engine": "groq",
"url": "https://console.groq.com/keys", "url": "https://console.groq.com/keys",
@ -15,8 +15,9 @@
}, },
"transform_resp": { "transform_resp": {
"chat_completions": { "chat_completions": {
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" "template": "{{tojson(input_request)}}"
}
} }
},
"explore_models_url": "https://console.groq.com/docs/models"
} }
} }

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-martian-extension", "id": "martian",
"type": "remote", "type": "remote",
"engine": "martian", "engine": "martian",
"url": "https://withmartian.com/dashboard", "url": "https://withmartian.com/dashboard",
@ -15,8 +15,9 @@
}, },
"transform_resp": { "transform_resp": {
"chat_completions": { "chat_completions": {
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" "template": "{{tojson(input_request)}}"
}
} }
},
"explore_models_url": "https://withmartian.github.io/llm-adapters/"
} }
} }

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-mistral-extension", "id": "mistral",
"type": "remote", "type": "remote",
"engine": "mistral", "engine": "mistral",
"url": "https://console.mistral.ai/api-keys/", "url": "https://console.mistral.ai/api-keys/",
@ -17,6 +17,7 @@
"chat_completions": { "chat_completions": {
"template": "{{tojson(input_request)}}" "template": "{{tojson(input_request)}}"
} }
} },
"explore_models_url": "https://docs.mistral.ai/getting-started/models/models_overview/"
} }
} }

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-nvidia-extension", "id": "nvidia",
"type": "remote", "type": "remote",
"engine": "nvidia", "engine": "nvidia",
"url": "https://org.ngc.nvidia.com/setup/personal-keys", "url": "https://org.ngc.nvidia.com/setup/personal-keys",
@ -15,8 +15,9 @@
}, },
"transform_resp": { "transform_resp": {
"chat_completions": { "chat_completions": {
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" "template": "{{tojson(input_request)}}"
}
} }
},
"explore_models_url": "https://build.nvidia.com/models"
} }
} }

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-openai-extension", "id": "openai",
"type": "remote", "type": "remote",
"engine": "openai", "engine": "openai",
"url": "https://platform.openai.com/account/api-keys", "url": "https://platform.openai.com/account/api-keys",
@ -10,13 +10,14 @@
"transform_req": { "transform_req": {
"chat_completions": { "chat_completions": {
"url": "https://api.openai.com/v1/chat/completions", "url": "https://api.openai.com/v1/chat/completions",
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }" "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% set first = false %} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
} }
}, },
"transform_resp": { "transform_resp": {
"chat_completions": { "chat_completions": {
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" "template": "{{tojson(input_request)}}"
}
} }
},
"explore_models_url": "https://platform.openai.com/docs/models"
} }
} }

View File

@ -1,5 +1,5 @@
{ {
"id": "@janhq/inference-openrouter-extension", "id": "openrouter",
"type": "remote", "type": "remote",
"engine": "openrouter", "engine": "openrouter",
"url": "https://openrouter.ai/keys", "url": "https://openrouter.ai/keys",
@ -10,13 +10,14 @@
"transform_req": { "transform_req": {
"chat_completions": { "chat_completions": {
"url": "https://openrouter.ai/api/v1/chat/completions", "url": "https://openrouter.ai/api/v1/chat/completions",
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
} }
}, },
"transform_resp": { "transform_resp": {
"chat_completions": { "chat_completions": {
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" "template": "{{tojson(input_request)}}"
}
} }
},
"explore_models_url": "https://openrouter.ai/models"
} }
} }

View File

@ -13,9 +13,19 @@ export default defineConfig([
NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`), NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
API_URL: JSON.stringify('http://127.0.0.1:39291'), API_URL: JSON.stringify('http://127.0.0.1:39291'),
SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'), SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
PLATFORM: JSON.stringify(process.platform),
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'), CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
DEFAULT_REMOTE_ENGINES: JSON.stringify(engines), DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
DEFAULT_REMOTE_MODELS: JSON.stringify(models), DEFAULT_REMOTE_MODELS: JSON.stringify(models),
DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
`{ {% set first = true %} {% for key, value in input_request %} {% if key == "messages" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} {% if not first %},{% endif %} "{{ key }}": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }`
),
DEFAULT_RESPONSE_BODY_TRANSFORM: JSON.stringify(
'{{tojson(input_request)}}'
),
DEFAULT_REQUEST_HEADERS_TRANSFORM: JSON.stringify(
'Authorization: Bearer {{api_key}}'
),
}, },
}, },
{ {
@ -29,15 +39,4 @@ export default defineConfig([
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'), CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
}, },
}, },
{
input: 'src/node/cpuInfo.ts',
output: {
format: 'cjs',
file: 'dist/node/cpuInfo.js',
},
external: ['cpu-instructions'],
resolve: {
extensions: ['.ts', '.js', '.svg'],
},
},
]) ])

View File

@ -1,7 +1,11 @@
declare const API_URL: string declare const API_URL: string
declare const CORTEX_ENGINE_VERSION: string declare const CORTEX_ENGINE_VERSION: string
declare const PLATFORM: string
declare const SOCKET_URL: string declare const SOCKET_URL: string
declare const NODE: string declare const NODE: string
declare const DEFAULT_REQUEST_PAYLOAD_TRANSFORM: string
declare const DEFAULT_RESPONSE_BODY_TRANSFORM: string
declare const DEFAULT_REQUEST_HEADERS_TRANSFORM: string
declare const DEFAULT_REMOTE_ENGINES: ({ declare const DEFAULT_REMOTE_ENGINES: ({
id: string id: string

View File

@ -19,12 +19,16 @@ import ky, { HTTPError } from 'ky'
import PQueue from 'p-queue' import PQueue from 'p-queue'
import { EngineError } from './error' import { EngineError } from './error'
import { getJanDataFolderPath } from '@janhq/core' import { getJanDataFolderPath } from '@janhq/core'
import { engineVariant } from './utils'
interface ModelList {
data: Model[]
}
/** /**
* JSONEngineManagementExtension is a EngineManagementExtension implementation that provides * JanEngineManagementExtension is a EngineManagementExtension implementation that provides
* functionality for managing engines. * functionality for managing engines.
*/ */
export default class JSONEngineManagementExtension extends EngineManagementExtension { export default class JanEngineManagementExtension extends EngineManagementExtension {
queue = new PQueue({ concurrency: 1 }) queue = new PQueue({ concurrency: 1 })
/** /**
@ -63,13 +67,12 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
* @returns A Promise that resolves to an object of list engines. * @returns A Promise that resolves to an object of list engines.
*/ */
async getRemoteModels(name: string): Promise<any> { async getRemoteModels(name: string): Promise<any> {
return this.queue.add(() => return ky
ky
.get(`${API_URL}/v1/models/remote/${name}`) .get(`${API_URL}/v1/models/remote/${name}`)
.json<Model[]>() .json<ModelList>()
.then((e) => e) .catch(() => ({
.catch(() => []) data: [],
) as Promise<Model[]> })) as Promise<ModelList>
} }
/** /**
@ -138,9 +141,38 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
* Add a new remote engine * Add a new remote engine
* @returns A Promise that resolves to intall of engine. * @returns A Promise that resolves to intall of engine.
*/ */
async addRemoteEngine(engineConfig: EngineConfig) { async addRemoteEngine(
engineConfig: EngineConfig,
persistModels: boolean = true
) {
// Populate default settings
if (
engineConfig.metadata?.transform_req?.chat_completions &&
!engineConfig.metadata.transform_req.chat_completions.template
)
engineConfig.metadata.transform_req.chat_completions.template =
DEFAULT_REQUEST_PAYLOAD_TRANSFORM
if (
engineConfig.metadata?.transform_resp?.chat_completions &&
!engineConfig.metadata.transform_resp.chat_completions?.template
)
engineConfig.metadata.transform_resp.chat_completions.template =
DEFAULT_RESPONSE_BODY_TRANSFORM
if (engineConfig.metadata && !engineConfig.metadata?.header_template)
engineConfig.metadata.header_template = DEFAULT_REQUEST_HEADERS_TRANSFORM
return this.queue.add(() => return this.queue.add(() =>
ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => e) ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => {
if (persistModels && engineConfig.metadata?.get_models_url) {
// Pull /models from remote models endpoint
return this.populateRemoteModels(engineConfig)
.then(() => e)
.catch(() => e)
}
return e
})
) as Promise<{ messages: string }> ) as Promise<{ messages: string }>
} }
@ -161,9 +193,25 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
* @param model - Remote model object. * @param model - Remote model object.
*/ */
async addRemoteModel(model: Model) { async addRemoteModel(model: Model) {
return this.queue.add(() => return this.queue
ky.post(`${API_URL}/v1/models/add`, { json: model }).then((e) => e) .add(() =>
ky
.post(`${API_URL}/v1/models/add`, {
json: {
inference_params: {
max_tokens: 4096,
temperature: 0.7,
top_p: 0.95,
stream: true,
frequency_penalty: 0,
presence_penalty: 0,
},
...model,
},
})
.then((e) => e)
) )
.then(() => {})
} }
/** /**
@ -245,11 +293,7 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
error instanceof EngineError error instanceof EngineError
) { ) {
const systemInfo = await systemInformation() const systemInfo = await systemInformation()
const variant = await executeOnMain( const variant = await engineVariant(systemInfo.gpuSetting)
NODE,
'engineVariant',
systemInfo.gpuSetting
)
await this.setDefaultEngineVariant(InferenceEngine.cortex_llamacpp, { await this.setDefaultEngineVariant(InferenceEngine.cortex_llamacpp, {
variant: variant, variant: variant,
version: `${CORTEX_ENGINE_VERSION}`, version: `${CORTEX_ENGINE_VERSION}`,
@ -293,14 +337,40 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
data.api_key = api_key data.api_key = api_key
/// END - Migrate legacy api key settings /// END - Migrate legacy api key settings
await this.addRemoteEngine(data).catch(console.error) await this.addRemoteEngine(data, false).catch(console.error)
}) })
) )
events.emit(EngineEvent.OnEngineUpdate, {}) events.emit(EngineEvent.OnEngineUpdate, {})
DEFAULT_REMOTE_MODELS.forEach(async (data: Model) => { await Promise.all(
await this.addRemoteModel(data).catch(() => {}) DEFAULT_REMOTE_MODELS.map((data: Model) =>
}) this.addRemoteModel(data).catch(() => {})
)
)
events.emit(ModelEvent.OnModelsUpdate, { fetch: true }) events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
} }
} }
/**
* Pulls models list from the remote provider and persist
* @param engineConfig
* @returns
*/
private populateRemoteModels = async (engineConfig: EngineConfig) => {
return this.getRemoteModels(engineConfig.engine)
.then((models: ModelList) => {
if (models?.data)
Promise.all(
models.data.map((model) =>
this.addRemoteModel({
...model,
engine: engineConfig.engine as InferenceEngine,
model: model.model ?? model.id,
}).catch(console.info)
)
).then(() => {
events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
})
})
.catch(console.info)
}
} }

View File

@ -1,27 +0,0 @@
import { cpuInfo } from 'cpu-instructions'
// Check the CPU info and determine the supported instruction set
const info = cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
? 'avx512'
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
? 'avx2'
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
? 'avx'
: 'noavx'
// Send the result and wait for confirmation before exiting
new Promise<void>((resolve, reject) => {
// @ts-ignore
process.send(info, (error: Error | null) => {
if (error) {
reject(error)
} else {
resolve()
}
})
})
.then(() => process.exit(0))
.catch((error) => {
console.error('Failed to send info:', error)
process.exit(1)
})

View File

@ -1,7 +1,6 @@
import { describe, expect, it } from '@jest/globals' import { describe, expect, it } from '@jest/globals'
import engine from './index' import engine from './index'
import { GpuSetting } from '@janhq/core/node' import { GpuSetting } from '@janhq/core'
import { cpuInfo } from 'cpu-instructions'
import { fork } from 'child_process' import { fork } from 'child_process'
let testSettings: GpuSetting = { let testSettings: GpuSetting = {
@ -23,22 +22,12 @@ let testSettings: GpuSetting = {
} }
const originalPlatform = process.platform const originalPlatform = process.platform
jest.mock('cpu-instructions', () => ({
cpuInfo: {
cpuInfo: jest.fn(),
},
}))
let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
mockCpuInfo.mockReturnValue([])
jest.mock('@janhq/core/node', () => ({
jest.mock('@janhq/core', () => ({
appResourcePath: () => '.', appResourcePath: () => '.',
log: jest.fn(), log: jest.fn(),
})) }))
jest.mock('child_process', () => ({
fork: jest.fn(),
}))
const mockFork = fork as jest.Mock
describe('test executable cortex file', () => { describe('test executable cortex file', () => {
afterAll(function () { afterAll(function () {
@ -48,14 +37,7 @@ describe('test executable cortex file', () => {
}) })
it('executes on MacOS', () => { it('executes on MacOS', () => {
const mockProcess = {
on: jest.fn((event, callback) => {
if (event === 'message') {
callback('noavx')
}
}),
send: jest.fn(),
}
Object.defineProperty(process, 'platform', { Object.defineProperty(process, 'platform', {
value: 'darwin', value: 'darwin',
}) })
@ -63,7 +45,7 @@ describe('test executable cortex file', () => {
value: 'arm64', value: 'arm64',
}) })
mockFork.mockReturnValue(mockProcess)
expect(engine.engineVariant(testSettings)).resolves.toEqual('mac-arm64') expect(engine.engineVariant(testSettings)).resolves.toEqual('mac-arm64')
}) })
@ -83,7 +65,7 @@ describe('test executable cortex file', () => {
}), }),
send: jest.fn(), send: jest.fn(),
} }
mockFork.mockReturnValue(mockProcess)
Object.defineProperty(process, 'arch', { Object.defineProperty(process, 'arch', {
value: 'x64', value: 'x64',
}) })
@ -107,7 +89,6 @@ describe('test executable cortex file', () => {
}), }),
send: jest.fn(), send: jest.fn(),
} }
mockFork.mockReturnValue(mockProcess)
expect(engine.engineVariant()).resolves.toEqual('windows-amd64-avx') expect(engine.engineVariant()).resolves.toEqual('windows-amd64-avx')
}) })
@ -145,7 +126,6 @@ describe('test executable cortex file', () => {
}), }),
send: jest.fn(), send: jest.fn(),
} }
mockFork.mockReturnValue(mockProcess)
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
'windows-amd64-avx2-cuda-11-7' 'windows-amd64-avx2-cuda-11-7'
@ -176,26 +156,11 @@ describe('test executable cortex file', () => {
}, },
], ],
} }
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback('noavx')
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
'windows-amd64-noavx-cuda-12-0' 'windows-amd64-noavx-cuda-12-0'
) )
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback('avx512')
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
'windows-amd64-avx2-cuda-12-0' 'windows-amd64-avx2-cuda-12-0'
) )
@ -209,14 +174,6 @@ describe('test executable cortex file', () => {
...testSettings, ...testSettings,
run_mode: 'cpu', run_mode: 'cpu',
} }
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback('noavx')
}
}),
send: jest.fn(),
})
expect(engine.engineVariant()).resolves.toEqual('linux-amd64-noavx') expect(engine.engineVariant()).resolves.toEqual('linux-amd64-noavx')
}) })
@ -245,16 +202,6 @@ describe('test executable cortex file', () => {
}, },
], ],
} }
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback('avx512')
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toBe( expect(engine.engineVariant(settings)).resolves.toBe(
'linux-amd64-avx2-cuda-11-7' 'linux-amd64-avx2-cuda-11-7'
) )
@ -284,14 +231,7 @@ describe('test executable cortex file', () => {
}, },
], ],
} }
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback('avx2')
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
'linux-amd64-avx2-cuda-12-0' 'linux-amd64-avx2-cuda-12-0'
@ -310,15 +250,6 @@ describe('test executable cortex file', () => {
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
cpuInstructions.forEach((instruction) => { cpuInstructions.forEach((instruction) => {
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback(instruction)
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
`linux-amd64-${instruction}` `linux-amd64-${instruction}`
) )
@ -335,14 +266,7 @@ describe('test executable cortex file', () => {
} }
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
cpuInstructions.forEach((instruction) => { cpuInstructions.forEach((instruction) => {
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback(instruction)
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
`windows-amd64-${instruction}` `windows-amd64-${instruction}`
) )
@ -376,14 +300,7 @@ describe('test executable cortex file', () => {
} }
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
cpuInstructions.forEach((instruction) => { cpuInstructions.forEach((instruction) => {
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback(instruction)
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
`windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0` `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
) )
@ -417,14 +334,7 @@ describe('test executable cortex file', () => {
], ],
} }
cpuInstructions.forEach((instruction) => { cpuInstructions.forEach((instruction) => {
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback(instruction)
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
`linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0` `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
) )
@ -459,14 +369,7 @@ describe('test executable cortex file', () => {
], ],
} }
cpuInstructions.forEach((instruction) => { cpuInstructions.forEach((instruction) => {
mockFork.mockReturnValue({
on: jest.fn((event, callback) => {
if (event === 'message') {
callback(instruction)
}
}),
send: jest.fn(),
})
expect(engine.engineVariant(settings)).resolves.toEqual( expect(engine.engineVariant(settings)).resolves.toEqual(
`linux-amd64-vulkan` `linux-amd64-vulkan`
) )

View File

@ -2,111 +2,10 @@ import * as path from 'path'
import { import {
appResourcePath, appResourcePath,
getJanDataFolderPath, getJanDataFolderPath,
GpuSetting,
log, log,
} from '@janhq/core/node' } from '@janhq/core/node'
import { fork } from 'child_process'
import { mkdir, readdir, symlink } from 'fs/promises' import { mkdir, readdir, symlink } from 'fs/promises'
/**
* The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
* @param settings
* @returns
*/
const gpuRunMode = (settings?: GpuSetting): string => {
if (process.platform === 'darwin')
// MacOS now has universal binaries
return ''
if (!settings) return ''
return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
}
/**
* The OS & architecture that the current process is running on.
* @returns win, mac-x64, mac-arm64, or linux
*/
const os = (): string => {
return process.platform === 'win32'
? 'windows-amd64'
: process.platform === 'darwin'
? process.arch === 'arm64'
? 'mac-arm64'
: 'mac-amd64'
: 'linux-amd64'
}
/**
* The CUDA version that will be set - either '11-7' or '12-0'.
* @param settings
* @returns
*/
const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
const isUsingCuda =
settings?.vulkan !== true &&
settings?.run_mode === 'gpu' &&
!os().includes('mac')
if (!isUsingCuda) return undefined
return settings?.cuda?.version === '11' ? '11-7' : '12-0'
}
/**
* The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
* @returns
*/
const cpuInstructions = async (): Promise<string> => {
if (process.platform === 'darwin') return ''
const child = fork(path.join(__dirname, './cpuInfo.js')) // Path to the child process file
return new Promise((resolve, reject) => {
child.on('message', (cpuInfo?: string) => {
resolve(cpuInfo ?? 'noavx')
child.kill() // Kill the child process after receiving the result
})
child.on('error', (err) => {
resolve('noavx')
child.kill()
})
child.on('exit', (code) => {
if (code !== 0) {
resolve('noavx')
child.kill()
}
})
})
}
/**
* Find which variant to run based on the current platform.
*/
const engineVariant = async (gpuSetting?: GpuSetting): Promise<string> => {
const cpuInstruction = await cpuInstructions()
log(`[CORTEX]: CPU instruction: ${cpuInstruction}`)
let engineVariant = [
os(),
gpuSetting?.vulkan
? 'vulkan'
: gpuRunMode(gpuSetting) !== 'cuda'
? // CPU mode - support all variants
cpuInstruction
: // GPU mode - packaged CUDA variants of avx2 and noavx
cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
? 'avx2'
: 'noavx',
gpuRunMode(gpuSetting),
cudaVersion(gpuSetting),
]
.filter((e) => !!e)
.join('-')
log(`[CORTEX]: Engine variant: ${engineVariant}`)
return engineVariant
}
/** /**
* Create symlink to each variant for the default bundled version * Create symlink to each variant for the default bundled version
@ -148,6 +47,5 @@ const symlinkEngines = async () => {
} }
export default { export default {
engineVariant,
symlinkEngines, symlinkEngines,
} }

View File

@ -0,0 +1,86 @@
import { GpuSetting, log } from '@janhq/core'
/**
* The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
* @param settings
* @returns
*/
const gpuRunMode = (settings?: GpuSetting): string => {
return settings.gpus?.some(
(gpu) =>
gpu.activated === true &&
gpu.additional_information &&
gpu.additional_information.driver_version
)
? 'cuda'
: ''
}
/**
* The OS & architecture that the current process is running on.
* @returns win, mac-x64, mac-arm64, or linux
*/
const os = (settings?: GpuSetting): string => {
return PLATFORM === 'win32'
? 'windows-amd64'
: PLATFORM === 'darwin'
? settings?.cpu?.arch === 'arm64'
? 'mac-arm64'
: 'mac-amd64'
: 'linux-amd64'
}
/**
* The CUDA version that will be set - either '11-7' or '12-0'.
* @param settings
* @returns
*/
const cudaVersion = (settings?: GpuSetting): '12-0' | '11-7' | undefined => {
const isUsingCuda =
settings?.vulkan !== true &&
settings?.gpus?.some((gpu) => (gpu.activated === true ? 'gpu' : 'cpu')) &&
!os().includes('mac')
if (!isUsingCuda) return undefined
// return settings?.cuda?.version === '11' ? '11-7' : '12-0'
return settings.gpus?.some((gpu) => gpu.version.includes('12'))
? '12-0'
: '11-7'
}
/**
* The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
* @returns
*/
/**
* Find which variant to run based on the current platform.
*/
export const engineVariant = async (
gpuSetting?: GpuSetting
): Promise<string> => {
const platform = os(gpuSetting)
// There is no need to append the variant extension for mac
if (platform.startsWith('mac')) return platform
let engineVariant =
gpuSetting?.vulkan || gpuSetting.gpus.some((e) => !e.additional_information)
? [platform, 'vulkan']
: [
platform,
gpuRunMode(gpuSetting) === 'cuda' &&
(gpuSetting.cpu.instructions.includes('avx2') ||
gpuSetting.cpu.instructions.includes('avx512'))
? 'avx2'
: 'noavx',
gpuRunMode(gpuSetting),
cudaVersion(gpuSetting),
].filter(Boolean) // Remove any falsy values
let engineVariantString = engineVariant.join('-')
log(`[CORTEX]: Engine variant: ${engineVariantString}`)
return engineVariantString
}

View File

@ -0,0 +1,5 @@
/** @type {import('ts-jest').JestConfigWithTsJest} */
module.exports = {
preset: 'ts-jest',
testEnvironment: 'node',
}

View File

@ -0,0 +1,48 @@
{
"name": "@janhq/hardware-management-extension",
"productName": "Hardware Management",
"version": "1.0.0",
"description": "Manages Better Hardware settings.",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",
"author": "Jan <service@jan.ai>",
"license": "MIT",
"scripts": {
"test": "jest",
"build": "rolldown -c rolldown.config.mjs",
"codesign:darwin": "../../.github/scripts/auto-sign.sh",
"codesign:win32:linux": "echo 'No codesigning required'",
"codesign": "run-script-os",
"build:publish": "rimraf *.tgz --glob || true && yarn build && yarn codesign && npm pack && cpx *.tgz ../../pre-install"
},
"exports": {
".": "./dist/index.js",
"./main": "./dist/module.js"
},
"devDependencies": {
"cpx": "^1.5.0",
"rimraf": "^3.0.2",
"rolldown": "^1.0.0-beta.1",
"run-script-os": "^1.1.6",
"ts-loader": "^9.5.0",
"typescript": "^5.3.3"
},
"dependencies": {
"@janhq/core": "../../core/package.tgz",
"cpu-instructions": "^0.0.13",
"ky": "^1.7.2",
"p-queue": "^8.0.1"
},
"bundledDependencies": [
"cpu-instructions",
"@janhq/core"
],
"hardwares": {
"node": ">=18.0.0"
},
"files": [
"dist/*",
"package.json",
"README.md"
]
}

View File

@ -0,0 +1,17 @@
import { defineConfig } from 'rolldown'
import pkgJson from './package.json' with { type: 'json' }
export default defineConfig([
{
input: 'src/index.ts',
output: {
format: 'esm',
file: 'dist/index.js',
},
define: {
NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
API_URL: JSON.stringify('http://127.0.0.1:39291'),
SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
},
},
])

View File

@ -0,0 +1,12 @@
declare const API_URL: string
declare const SOCKET_URL: string
declare const NODE: string
interface Core {
api: APIFunctions
events: EventEmitter
}
interface Window {
core?: Core | undefined
electronAPI?: any | undefined
}

View File

@ -0,0 +1,67 @@
import {
executeOnMain,
HardwareManagementExtension,
HardwareInformation,
} from '@janhq/core'
import ky from 'ky'
import PQueue from 'p-queue'
/**
* JSONHardwareManagementExtension is a HardwareManagementExtension implementation that provides
* functionality for managing engines.
*/
export default class JSONHardwareManagementExtension extends HardwareManagementExtension {
queue = new PQueue({ concurrency: 1 })
/**
* Called when the extension is loaded.
*/
async onLoad() {
// Run Healthcheck
this.queue.add(() => this.healthz())
}
/**
* Called when the extension is unloaded.
*/
onUnload() {}
/**
* Do health check on cortex.cpp
* @returns
*/
async healthz(): Promise<void> {
return ky
.get(`${API_URL}/healthz`, {
retry: { limit: 20, delay: () => 500, methods: ['get'] },
})
.then(() => {})
}
/**
* @returns A Promise that resolves to an object of hardware.
*/
async getHardware(): Promise<HardwareInformation> {
return this.queue.add(() =>
ky
.get(`${API_URL}/v1/hardware`)
.json<HardwareInformation>()
.then((e) => e)
) as Promise<HardwareInformation>
}
/**
* @returns A Promise that resolves to an object of set gpu activate.
*/
async setAvtiveGpu(data: { gpus: number[] }): Promise<{
message: string
activated_gpus: number[]
}> {
return this.queue.add(() =>
ky.post(`${API_URL}/v1/hardware/activate`, { json: data }).then((e) => e)
) as Promise<{
message: string
activated_gpus: number[]
}>
}
}

View File

@ -8,7 +8,9 @@
"forceConsistentCasingInFileNames": true, "forceConsistentCasingInFileNames": true,
"strict": false, "strict": false,
"skipLibCheck": true, "skipLibCheck": true,
"rootDir": "./src" "rootDir": "./src",
"resolveJsonModule": true
}, },
"include": ["./src"] "include": ["./src"],
"exclude": ["src/**/*.test.ts", "rolldown.config.mjs"]
} }

View File

@ -1 +1 @@
1.0.9-rc7 1.0.10

View File

@ -1,7 +1,7 @@
{ {
"name": "@janhq/inference-cortex-extension", "name": "@janhq/inference-cortex-extension",
"productName": "Cortex Inference Engine", "productName": "Cortex Inference Engine",
"version": "1.0.24", "version": "1.0.25",
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"main": "dist/index.js", "main": "dist/index.js",
"node": "dist/node/index.cjs.js", "node": "dist/node/index.cjs.js",

View File

@ -76,7 +76,7 @@
}, },
{ {
"key": "use_mmap", "key": "use_mmap",
"title": "MMAP", "title": "mmap",
"description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.", "description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
"controllerType": "checkbox", "controllerType": "checkbox",
"controllerProps": { "controllerProps": {

View File

@ -0,0 +1,35 @@
{
"sources": [
{
"filename": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf"
}
],
"id": "deepseek-r1-distill-llama-70b",
"object": "model",
"name": "DeepSeek R1 Distill Llama 70B Q4",
"version": "1.0",
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
"format": "gguf",
"settings": {
"ctx_len": 131072,
"prompt_template": "<User> {prompt} <Assistant>",
"llama_model_path": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
"ngl": 81
},
"parameters": {
"temperature": 0.6,
"top_p": 0.95,
"stream": true,
"max_tokens": 131072,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "DeepSeek",
"tags": ["70B", "Featured"],
"size": 42500000000
},
"engine": "llama-cpp"
}

View File

@ -0,0 +1,35 @@
{
"sources": [
{
"filename": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf"
}
],
"id": "deepseek-r1-distill-llama-8b",
"object": "model",
"name": "DeepSeek R1 Distill Llama 8B Q5",
"version": "1.0",
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
"format": "gguf",
"settings": {
"ctx_len": 131072,
"prompt_template": "<User> {prompt} <Assistant>",
"llama_model_path": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
"ngl": 33
},
"parameters": {
"temperature": 0.6,
"top_p": 0.95,
"stream": true,
"max_tokens": 131072,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "DeepSeek",
"tags": ["8B", "Featured"],
"size": 5730000000
},
"engine": "llama-cpp"
}

View File

@ -0,0 +1,35 @@
{
"sources": [
{
"filename": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
}
],
"id": "deepseek-r1-distill-qwen-1.5b",
"object": "model",
"name": "DeepSeek R1 Distill Qwen 1.5B Q5",
"version": "1.0",
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
"format": "gguf",
"settings": {
"ctx_len": 131072,
"prompt_template": "<User> {prompt} <Assistant>",
"llama_model_path": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
"ngl": 29
},
"parameters": {
"temperature": 0.6,
"top_p": 0.95,
"stream": true,
"max_tokens": 131072,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "DeepSeek",
"tags": ["1.5B", "Featured"],
"size": 1290000000
},
"engine": "llama-cpp"
}

View File

@ -0,0 +1,35 @@
{
"sources": [
{
"filename": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf"
}
],
"id": "deepseek-r1-distill-qwen-14b",
"object": "model",
"name": "DeepSeek R1 Distill Qwen 14B Q4",
"version": "1.0",
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
"format": "gguf",
"settings": {
"ctx_len": 131072,
"prompt_template": "<User> {prompt} <Assistant>",
"llama_model_path": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
"ngl": 49
},
"parameters": {
"temperature": 0.6,
"top_p": 0.95,
"stream": true,
"max_tokens": 131072,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "DeepSeek",
"tags": ["14B", "Featured"],
"size": 8990000000
},
"engine": "llama-cpp"
}

View File

@ -0,0 +1,35 @@
{
"sources": [
{
"filename": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf"
}
],
"id": "deepseek-r1-distill-qwen-32b",
"object": "model",
"name": "DeepSeek R1 Distill Qwen 32B Q4",
"version": "1.0",
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
"format": "gguf",
"settings": {
"ctx_len": 131072,
"prompt_template": "<User> {prompt} <Assistant>",
"llama_model_path": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
"ngl": 65
},
"parameters": {
"temperature": 0.6,
"top_p": 0.95,
"stream": true,
"max_tokens": 131072,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "DeepSeek",
"tags": ["32B", "Featured"],
"size": 19900000000
},
"engine": "llama-cpp"
}

View File

@ -0,0 +1,35 @@
{
"sources": [
{
"filename": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf"
}
],
"id": "deepseek-r1-distill-qwen-7b",
"object": "model",
"name": "DeepSeek R1 Distill Qwen 7B Q5",
"version": "1.0",
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
"format": "gguf",
"settings": {
"ctx_len": 131072,
"prompt_template": "<User> {prompt} <Assistant>",
"llama_model_path": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
"ngl": 29
},
"parameters": {
"temperature": 0.6,
"top_p": 0.95,
"stream": true,
"max_tokens": 131072,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "DeepSeek",
"tags": ["7B", "Featured"],
"size": 5440000000
},
"engine": "llama-cpp"
}

View File

@ -22,19 +22,13 @@
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 8192, "max_tokens": 8192,
"stop": [ "stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
"<|end_of_text|>",
"<|eot_id|>",
"<|eom_id|>"
],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0
}, },
"metadata": { "metadata": {
"author": "MetaAI", "author": "MetaAI",
"tags": [ "tags": ["8B", "Featured"],
"8B", "Featured"
],
"size": 4920000000 "size": 4920000000
}, },
"engine": "llama-cpp" "engine": "llama-cpp"

View File

@ -49,6 +49,13 @@ import qwen2514bJson from './resources/models/qwen2.5-14b-instruct/model.json' w
import qwen2532bJson from './resources/models/qwen2.5-32b-instruct/model.json' with { type: 'json' } import qwen2532bJson from './resources/models/qwen2.5-32b-instruct/model.json' with { type: 'json' }
import qwen2572bJson from './resources/models/qwen2.5-72b-instruct/model.json' with { type: 'json' } import qwen2572bJson from './resources/models/qwen2.5-72b-instruct/model.json' with { type: 'json' }
import deepseekR1DistillQwen_1_5b from './resources/models/deepseek-r1-distill-qwen-1.5b/model.json' with { type: 'json' }
import deepseekR1DistillQwen_7b from './resources/models/deepseek-r1-distill-qwen-7b/model.json' with { type: 'json' }
import deepseekR1DistillQwen_14b from './resources/models/deepseek-r1-distill-qwen-14b/model.json' with { type: 'json' }
import deepseekR1DistillQwen_32b from './resources/models/deepseek-r1-distill-qwen-32b/model.json' with { type: 'json' }
import deepseekR1DistillLlama_8b from './resources/models/deepseek-r1-distill-llama-8b/model.json' with { type: 'json' }
import deepseekR1DistillLlama_70b from './resources/models/deepseek-r1-distill-llama-70b/model.json' with { type: 'json' }
export default defineConfig([ export default defineConfig([
{ {
input: 'src/index.ts', input: 'src/index.ts',
@ -106,6 +113,12 @@ export default defineConfig([
qwen2514bJson, qwen2514bJson,
qwen2532bJson, qwen2532bJson,
qwen2572bJson, qwen2572bJson,
deepseekR1DistillQwen_1_5b,
deepseekR1DistillQwen_7b,
deepseekR1DistillQwen_14b,
deepseekR1DistillQwen_32b,
deepseekR1DistillLlama_8b,
deepseekR1DistillLlama_70b,
]), ]),
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
SETTINGS: JSON.stringify(defaultSettingJson), SETTINGS: JSON.stringify(defaultSettingJson),

View File

@ -112,8 +112,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
// Run the process watchdog // Run the process watchdog
const systemInfo = await systemInformation() // const systemInfo = await systemInformation()
this.queue.add(() => executeOnMain(NODE, 'run', systemInfo)) this.queue.add(() => executeOnMain(NODE, 'run'))
this.queue.add(() => this.healthz()) this.queue.add(() => this.healthz())
this.subscribeToEvents() this.subscribeToEvents()

View File

@ -16,15 +16,20 @@ let watchdog: ProcessWatchdog | undefined = undefined
* Spawns a Nitro subprocess. * Spawns a Nitro subprocess.
* @returns A promise that resolves when the Nitro subprocess is started. * @returns A promise that resolves when the Nitro subprocess is started.
*/ */
function run(systemInfo?: SystemInformation): Promise<any> { function run(): Promise<any> {
log(`[CORTEX]:: Spawning cortex subprocess...`) log(`[CORTEX]:: Spawning cortex subprocess...`)
return new Promise<void>(async (resolve, reject) => { return new Promise<void>(async (resolve, reject) => {
let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? '' // let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
let binaryName = `cortex-server${process.platform === 'win32' ? '.exe' : ''}` let binaryName = `cortex-server${
process.platform === 'win32' ? '.exe' : ''
}`
const binPath = path.join(__dirname, '..', 'bin') const binPath = path.join(__dirname, '..', 'bin')
const executablePath = path.join(binPath, binaryName) const executablePath = path.join(binPath, binaryName)
addEnvPaths(binPath)
const sharedPath = path.join(appResourcePath(), 'shared') const sharedPath = path.join(appResourcePath(), 'shared')
// Execute the binary // Execute the binary
log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`) log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`)
@ -44,15 +49,17 @@ function run(systemInfo?: SystemInformation): Promise<any> {
`${path.join(dataFolderPath, '.janrc')}`, `${path.join(dataFolderPath, '.janrc')}`,
'--data_folder_path', '--data_folder_path',
dataFolderPath, dataFolderPath,
'--loglevel',
'INFO',
], ],
{ {
env: { env: {
...process.env, ...process.env,
CUDA_VISIBLE_DEVICES: gpuVisibleDevices, // CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
// Vulkan - Support 1 device at a time for now // // Vulkan - Support 1 device at a time for now
...(gpuVisibleDevices?.length > 0 && { // ...(gpuVisibleDevices?.length > 0 && {
GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices, // GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
}), // }),
}, },
cwd: sharedPath, cwd: sharedPath,
} }
@ -71,6 +78,22 @@ function dispose() {
watchdog?.terminate() watchdog?.terminate()
} }
/**
* Set the environment paths for the cortex subprocess
* @param dest
*/
function addEnvPaths(dest: string) {
// Add engine path to the PATH and LD_LIBRARY_PATH
if (process.platform === 'win32') {
process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
} else {
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
path.delimiter,
dest
)
}
}
/** /**
* Cortex process info * Cortex process info
*/ */

View File

@ -15,8 +15,6 @@ import {
} from '@janhq/core' } from '@janhq/core'
import { CortexAPI } from './cortex' import { CortexAPI } from './cortex'
import { scanModelsFolder } from './legacy/model-json' import { scanModelsFolder } from './legacy/model-json'
import { downloadModel } from './legacy/download'
import { systemInformation } from '@janhq/core'
import { deleteModelFiles } from './legacy/delete' import { deleteModelFiles } from './legacy/delete'
export enum Settings { export enum Settings {
@ -71,18 +69,6 @@ export default class JanModelExtension extends ModelExtension {
* @returns A Promise that resolves when the model is downloaded. * @returns A Promise that resolves when the model is downloaded.
*/ */
async pullModel(model: string, id?: string, name?: string): Promise<void> { async pullModel(model: string, id?: string, name?: string): Promise<void> {
if (id) {
const model: Model = ModelManager.instance().get(id)
// Clip vision model - should not be handled by cortex.cpp
// TensorRT model - should not be handled by cortex.cpp
if (
model &&
(model.engine === InferenceEngine.nitro_tensorrt_llm ||
model.settings.vision_model)
) {
return downloadModel(model, (await systemInformation()).gpuSetting)
}
}
/** /**
* Sending POST to /models/pull/{id} endpoint to pull the model * Sending POST to /models/pull/{id} endpoint to pull the model
*/ */

View File

@ -2,15 +2,12 @@ import {
downloadFile, downloadFile,
DownloadRequest, DownloadRequest,
fs, fs,
GpuSetting,
InferenceEngine,
joinPath, joinPath,
Model, Model,
} from '@janhq/core' } from '@janhq/core'
export const downloadModel = async ( export const downloadModel = async (
model: Model, model: Model,
gpuSettings?: GpuSetting,
network?: { ignoreSSL?: boolean; proxy?: string } network?: { ignoreSSL?: boolean; proxy?: string }
): Promise<void> => { ): Promise<void> => {
const homedir = 'file://models' const homedir = 'file://models'
@ -27,41 +24,6 @@ export const downloadModel = async (
JSON.stringify(model, null, 2) JSON.stringify(model, null, 2)
) )
if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
if (!gpuSettings || gpuSettings.gpus.length === 0) {
console.error('No GPU found. Please check your GPU setting.')
return
}
const firstGpu = gpuSettings.gpus[0]
if (!firstGpu.name.toLowerCase().includes('nvidia')) {
console.error('No Nvidia GPU found. Please check your GPU setting.')
return
}
const gpuArch = firstGpu.arch
if (gpuArch === undefined) {
console.error('No GPU architecture found. Please check your GPU setting.')
return
}
if (!supportedGpuArch.includes(gpuArch)) {
console.debug(
`Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.`
)
return
}
const os = 'windows' // TODO: remove this hard coded value
const newSources = model.sources.map((source) => {
const newSource = { ...source }
newSource.url = newSource.url
.replace(/<os>/g, os)
.replace(/<gpuarch>/g, gpuArch)
return newSource
})
model.sources = newSources
}
console.debug(`Download sources: ${JSON.stringify(model.sources)}`) console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
if (model.sources.length > 1) { if (model.sources.length > 1) {

View File

@ -1,75 +0,0 @@
# Create a Jan Extension using Typescript
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
## Create Your Own Extension
To create your own extension, you can use this repository as a template! Just follow the below instructions:
1. Click the Use this template button at the top of the repository
2. Select Create a new repository
3. Select an owner and name for your new repository
4. Click Create repository
5. Clone your new repository
## Initial Setup
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
> [!NOTE]
>
> You'll need to have a reasonably modern version of
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
> [`nodenv`](https://github.com/nodenv/nodenv) or
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
> root of your repository to install the version specified in
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
1. :hammer_and_wrench: Install the dependencies
```bash
npm install
```
1. :building_construction: Package the TypeScript for distribution
```bash
npm run bundle
```
1. :white_check_mark: Check your artifact
There will be a tgz file in your extension directory now
## Update the Extension Metadata
The [`package.json`](package.json) file defines metadata about your extension, such as
extension name, main entry, description and version.
When you copy this repository, update `package.json` with the name, description for your extension.
## Update the Extension Code
The [`src/`](./src/) directory is the heart of your extension! This contains the
source code that will be run when your extension functions are invoked. You can replace the
contents of this directory with your own code.
There are a few things to keep in mind when writing your extension code:
- Most Jan Extension functions are processed asynchronously.
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
```typescript
import { events, MessageEvent, MessageRequest } from '@janhq/core'
function onStart(): Promise<any> {
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
this.inference(data)
)
}
```
For more information about the Jan Extension Core module, see the
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
So, what are you waiting for? Go ahead and start customizing your extension!

View File

@ -1,2 +0,0 @@
@echo off
.\node_modules\.bin\download https://catalog.jan.ai/vulkaninfoSDK.exe -o ./bin

View File

@ -1,49 +0,0 @@
{
"name": "@janhq/monitoring-extension",
"productName": "System Monitoring",
"version": "1.0.10",
"description": "Provides system health and OS level data.",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",
"author": "Jan <service@jan.ai>",
"license": "AGPL-3.0",
"scripts": {
"build": "rolldown -c rolldown.config.mjs && yarn download-artifacts",
"download-artifacts": "run-script-os && cpx \"bin/**\" \"dist/bin\"",
"download-artifacts:darwin": "echo 'No artifacts to download for darwin'",
"download-artifacts:win32": "download.bat",
"download-artifacts:linux": "download https://catalog.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
"build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
},
"exports": {
".": "./dist/index.js",
"./main": "./dist/node/index.cjs.js"
},
"devDependencies": {
"@types/node": "^20.11.4",
"@types/node-os-utils": "^1.3.4",
"cpx": "^1.5.0",
"download-cli": "^1.1.1",
"rimraf": "^3.0.2",
"rolldown": "1.0.0-beta.1",
"run-script-os": "^1.1.6",
"typescript": "^5.3.3"
},
"dependencies": {
"@janhq/core": "../../core/package.tgz",
"node-os-utils": "^1.3.7"
},
"files": [
"dist/*",
"package.json",
"README.md"
],
"bundleDependencies": [
"node-os-utils",
"@janhq/core"
],
"installConfig": {
"hoistingLimits": "workspaces"
},
"packageManager": "yarn@4.5.3"
}

View File

@ -1,22 +0,0 @@
[
{
"key": "log-enabled",
"title": "Enable App Logs",
"description": "Saves app logs locally on your computer. This enables you to send us crash reports.",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "log-cleaning-interval",
"title": "Log Cleaning Interval",
"description": "Automatically delete local logs after a certain time interval (in milliseconds).",
"controllerType": "input",
"controllerProps": {
"value": "120000",
"placeholder": "Interval in milliseconds. E.g. 120000",
"textAlign": "right"
}
}
]

View File

@ -1,32 +0,0 @@
import { defineConfig } from 'rolldown'
import packageJson from './package.json' with { type: 'json' }
import settingJson from './resources/settings.json' with { type: 'json' }
export default defineConfig([
{
input: 'src/index.ts',
output: {
format: 'esm',
file: 'dist/index.js',
},
platform: 'browser',
define: {
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
SETTINGS: JSON.stringify(settingJson),
},
},
{
input: 'src/node/index.ts',
external: ['@janhq/core/node'],
output: {
format: 'cjs',
file: 'dist/node/index.cjs.js',
sourcemap: false,
inlineDynamicImports: true,
},
resolve: {
extensions: ['.js', '.ts', '.json'],
},
platform: 'node',
},
])

View File

@ -1,19 +0,0 @@
declare const NODE: string
declare const SETTINGS: SettingComponentProps[]
type CpuGpuInfo = {
cpu: {
usage: number
}
gpu: GpuInfo[]
}
type GpuInfo = {
id: string
name: string
temperature: string
utilization: string
memoryTotal: string
memoryFree: string
memoryUtilization: string
}

View File

@ -1,90 +0,0 @@
import {
AppConfigurationEventName,
GpuSetting,
MonitoringExtension,
OperatingSystemInfo,
events,
executeOnMain,
} from '@janhq/core'
enum Settings {
logEnabled = 'log-enabled',
logCleaningInterval = 'log-cleaning-interval',
}
/**
* JanMonitoringExtension is a extension that provides system monitoring functionality.
* It implements the MonitoringExtension interface from the @janhq/core package.
*/
export default class JanMonitoringExtension extends MonitoringExtension {
/**
* Called when the extension is loaded.
*/
async onLoad() {
// Register extension settings
this.registerSettings(SETTINGS)
const logEnabled = await this.getSetting<boolean>(Settings.logEnabled, true)
const logCleaningInterval = parseInt(
await this.getSetting<string>(Settings.logCleaningInterval, '120000')
)
// Register File Logger provided by this extension
await executeOnMain(NODE, 'registerLogger', {
logEnabled,
logCleaningInterval: isNaN(logCleaningInterval)
? 120000
: logCleaningInterval,
})
// Attempt to fetch nvidia info
await executeOnMain(NODE, 'updateNvidiaInfo')
events.emit(AppConfigurationEventName.OnConfigurationUpdate, {})
}
onSettingUpdate<T>(key: string, value: T): void {
if (key === Settings.logEnabled) {
executeOnMain(NODE, 'updateLogger', { logEnabled: value })
} else if (key === Settings.logCleaningInterval) {
executeOnMain(NODE, 'updateLogger', { logCleaningInterval: value })
}
}
/**
* Called when the extension is unloaded.
*/
onUnload(): void {
// Register File Logger provided by this extension
executeOnMain(NODE, 'unregisterLogger')
}
/**
* Returns the GPU configuration.
* @returns A Promise that resolves to an object containing the GPU configuration.
*/
async getGpuSetting(): Promise<GpuSetting | undefined> {
return executeOnMain(NODE, 'getGpuConfig')
}
/**
* Returns information about the system resources.
* @returns A Promise that resolves to an object containing information about the system resources.
*/
getResourcesInfo(): Promise<any> {
return executeOnMain(NODE, 'getResourcesInfo')
}
/**
* Returns information about the current system load.
* @returns A Promise that resolves to an object containing information about the current system load.
*/
getCurrentLoad(): Promise<any> {
return executeOnMain(NODE, 'getCurrentLoad')
}
/**
* Returns information about the OS
* @returns
*/
getOsInfo(): Promise<OperatingSystemInfo> {
return executeOnMain(NODE, 'getOsInfo')
}
}

View File

@ -1,389 +0,0 @@
import {
GpuSetting,
GpuSettingInfo,
LoggerManager,
OperatingSystemInfo,
ResourceInfo,
SupportedPlatforms,
getJanDataFolderPath,
log,
} from '@janhq/core/node'
import { mem, cpu } from 'node-os-utils'
import { exec } from 'child_process'
import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs'
import path from 'path'
import os from 'os'
import { FileLogger } from './logger'
/**
* Path to the settings directory
**/
export const SETTINGS_DIR = path.join(getJanDataFolderPath(), 'settings')
/**
* Path to the settings file
**/
export const GPU_INFO_FILE = path.join(SETTINGS_DIR, 'settings.json')
/**
* Default GPU settings
* TODO: This needs to be refactored to support multiple accelerators
**/
const DEFAULT_SETTINGS: GpuSetting = {
notify: true,
run_mode: 'cpu',
nvidia_driver: {
exist: false,
version: '',
},
cuda: {
exist: false,
version: '',
},
gpus: [],
gpu_highest_vram: '',
gpus_in_use: [],
is_initial: true,
// TODO: This needs to be set based on user toggle in settings
vulkan: false,
}
export const getGpuConfig = async (): Promise<GpuSetting | undefined> => {
if (process.platform === 'darwin') return undefined
if (existsSync(GPU_INFO_FILE))
return JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
return DEFAULT_SETTINGS
}
export const getResourcesInfo = async (): Promise<ResourceInfo> => {
const ramUsedInfo = await mem.used()
const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
const resourceInfo: ResourceInfo = {
mem: {
totalMemory,
usedMemory,
},
}
return resourceInfo
}
export const getCurrentLoad = () =>
new Promise<CpuGpuInfo>(async (resolve, reject) => {
const cpuPercentage = await cpu.usage()
let data = {
run_mode: 'cpu',
gpus_in_use: [],
}
if (process.platform !== 'darwin') {
data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
}
if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
const gpuIds = data.gpus_in_use.join(',')
if (gpuIds !== '' && data['vulkan'] !== true) {
exec(
`nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
(error, stdout, _) => {
if (error) {
console.error(`exec error: ${error}`)
throw new Error(error.message)
}
const gpuInfo: GpuInfo[] = stdout
.trim()
.split('\n')
.map((line) => {
const [
id,
name,
temperature,
utilization,
memoryTotal,
memoryFree,
memoryUtilization,
] = line.split(', ').map((item) => item.replace(/\r/g, ''))
return {
id,
name,
temperature,
utilization,
memoryTotal,
memoryFree,
memoryUtilization,
}
})
resolve({
cpu: { usage: cpuPercentage },
gpu: gpuInfo,
})
}
)
} else {
// Handle the case where gpuIds is empty
resolve({
cpu: { usage: cpuPercentage },
gpu: [],
})
}
} else {
// Handle the case where run_mode is not 'gpu' or no GPUs are in use
resolve({
cpu: { usage: cpuPercentage },
gpu: [],
})
}
})
/**
* This will retrieve GPU information and persist settings.json
* Will be called when the extension is loaded to turn on GPU acceleration if supported
*/
export const updateNvidiaInfo = async () => {
// ignore if macos
if (process.platform === 'darwin') return
try {
JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
} catch (error) {
if (!existsSync(SETTINGS_DIR)) {
mkdirSync(SETTINGS_DIR, {
recursive: true,
})
}
writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2))
}
await updateNvidiaDriverInfo()
await updateGpuInfo()
}
const updateNvidiaDriverInfo = async () =>
new Promise((resolve, reject) => {
exec(
'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
(error, stdout) => {
const data: GpuSetting = JSON.parse(
readFileSync(GPU_INFO_FILE, 'utf-8')
)
if (!error) {
const firstLine = stdout.split('\n')[0].trim()
data.nvidia_driver.exist = true
data.nvidia_driver.version = firstLine
} else {
data.nvidia_driver.exist = false
}
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
resolve({})
}
)
})
const getGpuArch = (gpuName: string): string => {
if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'
if (gpuName.includes('30')) return 'ampere'
else if (gpuName.includes('40')) return 'ada'
else return 'unknown'
}
const updateGpuInfo = async () =>
new Promise((resolve, reject) => {
let data: GpuSetting = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
// Cuda
if (data.vulkan === true) {
// Vulkan
exec(
process.platform === 'win32'
? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
: `${__dirname}/../bin/vulkaninfo --summary`,
async (error, stdout) => {
if (!error) {
const output = stdout.toString()
log(output)
const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
const gpus: GpuSettingInfo[] = []
let match
while ((match = gpuRegex.exec(output)) !== null) {
const id = match[1]
const name = match[2]
const arch = getGpuArch(name)
gpus.push({ id, vram: '0', name, arch })
}
data.gpus = gpus
if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
}
data = await updateCudaExistence(data)
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
log(`[APP]::${JSON.stringify(data)}`)
resolve({})
} else {
reject(error)
}
}
)
} else {
exec(
'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
async (error, stdout) => {
if (!error) {
log(`[SPECS]::${stdout}`)
// Get GPU info and gpu has higher memory first
let highestVram = 0
let highestVramId = '0'
const gpus: GpuSettingInfo[] = stdout
.trim()
.split('\n')
.map((line) => {
let [id, vram, name] = line.split(', ')
const arch = getGpuArch(name)
vram = vram.replace(/\r/g, '')
if (parseFloat(vram) > highestVram) {
highestVram = parseFloat(vram)
highestVramId = id
}
return { id, vram, name, arch }
})
data.gpus = gpus
data.gpu_highest_vram = highestVramId
} else {
data.gpus = []
data.gpu_highest_vram = undefined
}
if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
data.gpus_in_use = data.gpu_highest_vram ? [data.gpu_highest_vram].filter(e => !!e) : []
}
data = await updateCudaExistence(data)
console.log('[MONITORING]::Cuda info: ', data)
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
log(`[APP]::${JSON.stringify(data)}`)
resolve({})
}
)
}
})
/**
* Check if file exists in paths
*/
const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => {
return paths.some((p) => existsSync(path.join(p, file)))
}
/**
* Validate cuda for linux and windows
*/
const updateCudaExistence = async (
data: GpuSetting = DEFAULT_SETTINGS
): Promise<GpuSetting> => {
let filesCuda12: string[]
let filesCuda11: string[]
let paths: string[]
let cudaVersion: string = ''
if (process.platform === 'win32') {
filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
filesCuda11 = ['cublas64_11.dll', 'cudart64_110.dll', 'cublasLt64_11.dll']
paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
} else {
filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
paths = process.env.LD_LIBRARY_PATH
? process.env.LD_LIBRARY_PATH.split(path.delimiter)
: []
paths.push('/usr/lib/x86_64-linux-gnu/')
}
let cudaExists = filesCuda12.every(
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
)
if (!cudaExists) {
cudaExists = filesCuda11.every(
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
)
if (cudaExists) {
cudaVersion = '11'
}
} else {
cudaVersion = '12'
}
data.cuda.exist = cudaExists
data.cuda.version = cudaVersion
console.debug(data.is_initial, data.gpus_in_use)
if (cudaExists && data.is_initial && data.gpus_in_use.length > 0) {
data.run_mode = 'gpu'
}
data.is_initial = false
// Attempt to query CUDA using NVIDIA SMI
if (!cudaExists) {
await new Promise<void>((resolve) => {
exec('nvidia-smi', (error, stdout) => {
if (!error) {
const regex = /CUDA\s*Version:\s*(\d+\.\d+)/g
const match = regex.exec(stdout)
if (match && match[1]) {
data.cuda.version = match[1]
}
}
console.log('[MONITORING]::Finalized cuda info update: ', data)
resolve()
})
})
}
return data
}
export const getOsInfo = (): OperatingSystemInfo => {
const platform =
SupportedPlatforms.find((p) => p === process.platform) || 'unknown'
const osInfo: OperatingSystemInfo = {
platform: platform,
arch: process.arch,
release: os.release(),
machine: os.machine(),
version: os.version(),
totalMem: os.totalmem(),
freeMem: os.freemem(),
}
return osInfo
}
export const registerLogger = ({ logEnabled, logCleaningInterval }) => {
const logger = new FileLogger(logEnabled, logCleaningInterval)
LoggerManager.instance().register(logger)
logger.cleanLogs()
}
export const unregisterLogger = () => {
LoggerManager.instance().unregister('file')
}
export const updateLogger = ({ logEnabled, logCleaningInterval }) => {
const logger = LoggerManager.instance().loggers.get('file') as FileLogger
if (logger && logEnabled !== undefined) logger.logEnabled = logEnabled
if (logger && logCleaningInterval)
logger.logCleaningInterval = logCleaningInterval
// Rerun
logger && logger.cleanLogs()
}

View File

@ -5,77 +5,470 @@
"post": { "post": {
"operationId": "AssistantsController_create", "operationId": "AssistantsController_create",
"summary": "Create assistant", "summary": "Create assistant",
"description": "Creates a new assistant.", "description": "Creates a new assistant with the specified configuration.",
"parameters": [],
"requestBody": { "requestBody": {
"required": true, "required": true,
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/CreateAssistantDto" "type": "object",
"properties": {
"model": {
"type": "string",
"description": "The model identifier to use for the assistant."
},
"name": {
"type": "string",
"description": "The name of the assistant."
},
"description": {
"type": "string",
"description": "The description of the assistant."
},
"instructions": {
"type": "string",
"description": "Instructions for the assistant's behavior."
},
"tools": {
"type": "array",
"description": "A list of tools enabled on the assistant. Maximum of 128 tools.",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"code_interpreter",
"file_search",
"function"
]
} }
} }
} }
}, },
"responses": { "tool_resources": {
"201": { "type": "object",
"description": "The assistant has been successfully created." "description": "Resources used by the assistant's tools.",
"properties": {
"code_interpreter": {
"type": "object"
},
"file_search": {
"type": "object"
}
} }
}, },
"tags": ["Assistants"] "metadata": {
"type": "object",
"description": "Set of key-value pairs for the assistant.",
"additionalProperties": true
}, },
"get": { "temperature": {
"operationId": "AssistantsController_findAll", "type": "number",
"summary": "List assistants", "format": "float",
"description": "Returns a list of assistants.", "description": "Temperature parameter for response generation."
"parameters": [ },
"top_p": {
"type": "number",
"format": "float",
"description": "Top p parameter for response generation."
},
"response_format": {
"oneOf": [
{ {
"name": "limit", "type": "string",
"required": false, "enum": ["auto"]
"in": "query",
"description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
"schema": {
"type": "number"
}
}, },
{ {
"name": "order", "type": "object"
"required": false, }
"in": "query", ]
"description": "Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.",
"schema": {
"type": "string"
} }
}, },
{ "required": ["model"]
"name": "after", }
"required": false, }
"in": "query",
"description": "A cursor for use in pagination. after is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.",
"schema": {
"type": "string"
} }
}, },
{
"name": "before",
"required": false,
"in": "query",
"description": "A cursor for use in pagination. before is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.",
"schema": {
"type": "string"
}
}
],
"responses": { "responses": {
"200": { "200": {
"description": "Ok", "description": "Ok",
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "The unique identifier of the assistant."
},
"object": {
"type": "string",
"enum": ["assistant"],
"description": "The object type, which is always 'assistant'."
},
"created_at": {
"type": "integer",
"description": "Unix timestamp (in seconds) of when the assistant was created."
},
"model": {
"type": "string",
"description": "The model identifier used by the assistant."
},
"name": {
"type": "string",
"description": "The name of the assistant."
},
"description": {
"type": "string",
"description": "The description of the assistant."
},
"instructions": {
"type": "string",
"description": "Instructions for the assistant's behavior."
},
"tools": {
"type": "array",
"description": "A list of tools enabled on the assistant.",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"code_interpreter",
"file_search",
"function"
]
}
}
}
},
"tool_resources": {
"type": "object",
"description": "Resources used by the assistant's tools.",
"properties": {
"code_interpreter": {
"type": "object"
},
"file_search": {
"type": "object"
}
}
},
"metadata": {
"type": "object",
"description": "Set of key-value pairs that can be attached to the assistant.",
"additionalProperties": true
},
"temperature": {
"type": "number",
"format": "float",
"description": "Temperature parameter for response generation."
},
"top_p": {
"type": "number",
"format": "float",
"description": "Top p parameter for response generation."
},
"response_format": {
"oneOf": [
{
"type": "string",
"enum": ["auto"]
},
{
"type": "object"
}
]
}
},
"required": [
"id",
"object",
"created_at",
"model",
"metadata"
]
}
}
}
}
},
"tags": ["Assistants"]
},
"patch": {
"operationId": "AssistantsController_update",
"summary": "Update assistant",
"description": "Updates an assistant. Requires at least one modifiable field.",
"parameters": [
{
"name": "id",
"required": true,
"in": "path",
"description": "The unique identifier of the assistant.",
"schema": {
"type": "string"
}
},
{
"name": "OpenAI-Beta",
"required": true,
"in": "header",
"description": "Beta feature header.",
"schema": {
"type": "string",
"enum": ["assistants=v2"]
}
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"model": {
"type": "string",
"description": "The model identifier to use for the assistant."
},
"name": {
"type": "string",
"description": "The name of the assistant."
},
"description": {
"type": "string",
"description": "The description of the assistant."
},
"instructions": {
"type": "string",
"description": "Instructions for the assistant's behavior."
},
"tools": {
"type": "array",
"description": "A list of tools enabled on the assistant. Maximum of 128 tools.",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"code_interpreter",
"file_search",
"function"
]
}
}
}
},
"tool_resources": {
"type": "object",
"description": "Resources used by the assistant's tools.",
"properties": {
"code_interpreter": {
"type": "object"
},
"file_search": {
"type": "object"
}
}
},
"metadata": {
"type": "object",
"description": "Set of key-value pairs for the assistant.",
"additionalProperties": true
},
"temperature": {
"type": "number",
"format": "float",
"description": "Temperature parameter for response generation."
},
"top_p": {
"type": "number",
"format": "float",
"description": "Top p parameter for response generation."
},
"response_format": {
"oneOf": [
{
"type": "string",
"enum": ["auto"]
},
{
"type": "object"
}
]
}
},
"minProperties": 1
}
}
}
},
"responses": {
"200": {
"description": "Ok",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "The unique identifier of the assistant."
},
"object": {
"type": "string",
"enum": ["assistant"],
"description": "The object type, which is always 'assistant'."
},
"created_at": {
"type": "integer",
"description": "Unix timestamp (in seconds) of when the assistant was created."
},
"model": {
"type": "string",
"description": "The model identifier used by the assistant."
},
"name": {
"type": "string",
"description": "The name of the assistant."
},
"description": {
"type": "string",
"description": "The description of the assistant."
},
"instructions": {
"type": "string",
"description": "Instructions for the assistant's behavior."
},
"tools": {
"type": "array",
"description": "A list of tools enabled on the assistant.",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"code_interpreter",
"file_search",
"function"
]
}
}
}
},
"tool_resources": {
"type": "object",
"description": "Resources used by the assistant's tools.",
"properties": {
"code_interpreter": {
"type": "object"
},
"file_search": {
"type": "object"
}
}
},
"metadata": {
"type": "object",
"description": "Set of key-value pairs that can be attached to the assistant.",
"additionalProperties": true
},
"temperature": {
"type": "number",
"format": "float",
"description": "Temperature parameter for response generation."
},
"top_p": {
"type": "number",
"format": "float",
"description": "Top p parameter for response generation."
},
"response_format": {
"oneOf": [
{
"type": "string",
"enum": ["auto"]
},
{
"type": "object"
}
]
}
},
"required": [
"id",
"object",
"created_at",
"model",
"metadata"
]
}
}
}
}
},
"tags": ["Assistants"]
},
"get": {
"operationId": "AssistantsController_list",
"summary": "List assistants",
"description": "Returns a list of assistants.",
"responses": {
"200": {
"description": "Ok",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"object": {
"type": "string",
"enum": ["list"],
"description": "The object type, which is always 'list' for a list response."
},
"data": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AssistantEntity" "type": "object",
"properties": {
"id": {
"type": "string",
"description": "The unique identifier of the assistant."
},
"object": {
"type": "string",
"enum": ["assistant"],
"description": "The object type, which is always 'assistant'."
},
"created_at": {
"type": "integer",
"description": "Unix timestamp (in seconds) of when the assistant was created."
},
"model": {
"type": "string",
"description": "The model identifier used by the assistant."
},
"metadata": {
"type": "object",
"description": "Set of key-value pairs that can be attached to the assistant.",
"additionalProperties": true
} }
},
"required": [
"id",
"object",
"created_at",
"model",
"metadata"
]
}
}
},
"required": ["object", "data"]
} }
} }
} }
@ -88,7 +481,77 @@
"get": { "get": {
"operationId": "AssistantsController_findOne", "operationId": "AssistantsController_findOne",
"summary": "Get assistant", "summary": "Get assistant",
"description": "Retrieves a specific assistant defined by an assistant's `id`.", "description": "Retrieves a specific assistant by ID.",
"parameters": [
{
"name": "id",
"required": true,
"in": "path",
"description": "The unique identifier of the assistant.",
"schema": {
"type": "string"
}
},
{
"name": "OpenAI-Beta",
"required": true,
"in": "header",
"description": "Beta feature header.",
"schema": {
"type": "string",
"enum": ["assistants=v2"]
}
}
],
"responses": {
"200": {
"description": "Ok",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "The unique identifier of the assistant."
},
"object": {
"type": "string",
"enum": ["assistant"],
"description": "The object type, which is always 'assistant'."
},
"created_at": {
"type": "integer",
"description": "Unix timestamp (in seconds) of when the assistant was created."
},
"model": {
"type": "string",
"description": "The model identifier used by the assistant."
},
"metadata": {
"type": "object",
"description": "Set of key-value pairs attached to the assistant.",
"additionalProperties": true
}
},
"required": [
"id",
"object",
"created_at",
"model",
"metadata"
]
}
}
}
}
},
"tags": ["Assistants"]
},
"delete": {
"operationId": "AssistantsController_remove",
"summary": "Delete assistant",
"description": "Deletes a specific assistant by ID.",
"parameters": [ "parameters": [
{ {
"name": "id", "name": "id",
@ -106,36 +569,24 @@
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/AssistantEntity" "type": "object",
} "properties": {
} "id": {
} "type": "string",
"description": "The unique identifier of the deleted assistant."
},
"object": {
"type": "string",
"enum": ["assistant.deleted"],
"description": "The object type for a deleted assistant."
},
"deleted": {
"type": "boolean",
"enum": [true],
"description": "Indicates the assistant was successfully deleted."
} }
}, },
"tags": ["Assistants"] "required": ["id", "object", "deleted"]
},
"delete": {
"operationId": "AssistantsController_remove",
"summary": "Delete assistant",
"description": "Deletes a specific assistant defined by an assistant's `id`.",
"parameters": [
{
"name": "id",
"required": true,
"in": "path",
"description": "The unique identifier of the assistant.",
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"description": "The assistant has been successfully deleted.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/DeleteAssistantResponseDto"
} }
} }
} }
@ -2199,6 +2650,84 @@
"tags": ["Engines"] "tags": ["Engines"]
} }
}, },
"/engines/{name}/releases/{version}": {
"get": {
"summary": "List variants for a specific engine version",
"description": "Lists all available variants (builds) for a specific version of an engine. Variants can include different CPU architectures (AVX, AVX2, AVX512), GPU support (CUDA, Vulkan), and operating systems (Windows, Linux, macOS).",
"parameters": [
{
"name": "name",
"in": "path",
"required": true,
"schema": {
"type": "string",
"enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
"default": "llama-cpp"
},
"description": "The type of engine"
},
{
"name": "version",
"in": "path",
"required": true,
"schema": {
"type": "string"
},
"description": "The version of the engine"
},
{
"name": "show",
"in": "query",
"required": false,
"schema": {
"type": "string",
"enum": ["all", "compatible"],
"default": "all"
},
"description": "Filter the variants list. Use 'compatible' to show only variants compatible with the current system, or 'all' to show all available variants."
}
],
"responses": {
"200": {
"description": "Successfully retrieved variants list",
"content": {
"application/json": {
"schema": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The name of the variant, including OS, architecture, and capabilities",
"example": "linux-amd64-avx-cuda-11-7"
},
"created_at": {
"type": "string",
"format": "date-time",
"description": "Creation timestamp of the variant",
"example": "2024-11-13T04:51:16Z"
},
"size": {
"type": "integer",
"description": "Size of the variant in bytes",
"example": 151224604
},
"download_count": {
"type": "integer",
"description": "Number of times this variant has been downloaded",
"example": 0
}
}
}
}
}
}
}
},
"tags": ["Engines"]
}
},
"/engines/{name}/releases/latest": { "/engines/{name}/releases/latest": {
"get": { "get": {
"summary": "Get latest release", "summary": "Get latest release",
@ -2314,7 +2843,7 @@
"get_models_url": { "get_models_url": {
"type": "string", "type": "string",
"description": "The URL to get models", "description": "The URL to get models",
"example": "https://api.openai.com/v1/models" "example": "https://api.openai.com/models"
} }
} }
} }
@ -3378,6 +3907,7 @@
"Files", "Files",
"Hardware", "Hardware",
"Events", "Events",
"Assistants",
"Threads", "Threads",
"Messages", "Messages",
"Pulling Models", "Pulling Models",
@ -4858,8 +5388,8 @@
"engine", "engine",
"version", "version",
"inference_params", "inference_params",
"TransformReq", "transform_req",
"TransformResp", "transform_resp",
"metadata" "metadata"
], ],
"properties": { "properties": {
@ -4867,9 +5397,9 @@
"type": "string", "type": "string",
"description": "The identifier of the model." "description": "The identifier of the model."
}, },
"api_key_template": { "header_template": {
"type": "string", "type": "string",
"description": "Template for the API key header." "description": "Template for the header."
}, },
"engine": { "engine": {
"type": "string", "type": "string",
@ -4902,7 +5432,7 @@
} }
} }
}, },
"TransformReq": { "transform_req": {
"type": "object", "type": "object",
"properties": { "properties": {
"get_models": { "get_models": {
@ -4924,7 +5454,7 @@
} }
} }
}, },
"TransformResp": { "transform_resp": {
"type": "object", "type": "object",
"properties": { "properties": {
"chat_completions": { "chat_completions": {
@ -5632,9 +6162,9 @@
"description": "Number of GPU layers.", "description": "Number of GPU layers.",
"example": 33 "example": 33
}, },
"api_key_template": { "header_template": {
"type": "string", "type": "string",
"description": "Template for the API key header." "description": "Template for the header."
}, },
"version": { "version": {
"type": "string", "type": "string",

View File

@ -10,7 +10,9 @@ const AutoLink = ({ text }: Props) => {
return ( return (
<> <>
{text.split(delimiter).map((word) => { {text &&
typeof text === 'string' &&
text.split(delimiter).map((word) => {
const match = word.match(delimiter) const match = word.match(delimiter)
if (match) { if (match) {
const url = match[0] const url = match[0]

View File

@ -23,7 +23,13 @@ import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom' import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom' import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
const ErrorMessage = ({ message }: { message: ThreadMessage }) => { const ErrorMessage = ({
message,
errorComponent,
}: {
message?: ThreadMessage
errorComponent?: React.ReactNode
}) => {
const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom) const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
const setMainState = useSetAtom(mainViewStateAtom) const setMainState = useSetAtom(mainViewStateAtom)
const setSelectedSettingScreen = useSetAtom(selectedSettingAtom) const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
@ -50,7 +56,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
const getErrorTitle = () => { const getErrorTitle = () => {
const engine = getEngine() const engine = getEngine()
switch (message.metadata?.error_code) { switch (message?.metadata?.error_code) {
case ErrorCode.InvalidApiKey: case ErrorCode.InvalidApiKey:
case ErrorCode.AuthenticationError: case ErrorCode.AuthenticationError:
return ( return (
@ -61,7 +67,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
className="font-medium text-[hsla(var(--app-link))] underline" className="font-medium text-[hsla(var(--app-link))] underline"
onClick={() => { onClick={() => {
setMainState(MainViewState.Settings) setMainState(MainViewState.Settings)
engine?.name && setSelectedSettingScreen(engine.name) setSelectedSettingScreen(activeAssistant?.model?.engine ?? '')
}} }}
> >
Settings Settings
@ -77,7 +83,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
data-testid="passthrough-error-message" data-testid="passthrough-error-message"
className="first-letter:uppercase" className="first-letter:uppercase"
> >
{message.content[0]?.text?.value === 'Failed to fetch' && {message?.content[0]?.text?.value === 'Failed to fetch' &&
engine && engine &&
engine?.name !== InferenceEngine.cortex_llamacpp ? ( engine?.name !== InferenceEngine.cortex_llamacpp ? (
<span> <span>
@ -89,6 +95,9 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
{message?.content[0]?.text?.value && ( {message?.content[0]?.text?.value && (
<AutoLink text={message?.content[0]?.text?.value} /> <AutoLink text={message?.content[0]?.text?.value} />
)} )}
{!message?.content[0]?.text?.value && (
<span>Something went wrong. Please try again.</span>
)}
</> </>
)} )}
</p> </p>
@ -100,12 +109,15 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
<div className="mx-auto my-6 max-w-[700px] px-4"> <div className="mx-auto my-6 max-w-[700px] px-4">
<div <div
className="mx-auto max-w-[400px] rounded-lg border border-[hsla(var(--app-border))]" className="mx-auto max-w-[400px] rounded-lg border border-[hsla(var(--app-border))]"
key={message.id} key={message?.id}
> >
<div className="flex justify-between border-b border-inherit px-4 py-2"> <div className="flex justify-between border-b border-inherit px-4 py-2">
<h6 className="text-[hsla(var(--destructive-bg))]">Error</h6> <h6 className="flex items-center gap-x-1 font-semibold text-[hsla(var(--destructive-bg))]">
<div className="flex gap-x-4 text-xs"> <span className="h-2 w-2 rounded-full bg-[hsla(var(--destructive-bg))]" />
<div> <span>Error</span>
</h6>
<div className="flex items-center gap-x-4 text-xs">
<div className="font-semibold">
<span <span
className="flex cursor-pointer items-center gap-x-1 text-[hsla(var(--app-link))]" className="flex cursor-pointer items-center gap-x-1 text-[hsla(var(--app-link))]"
onClick={() => setModalTroubleShooting(true)} onClick={() => setModalTroubleShooting(true)}
@ -116,7 +128,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
<ModalTroubleShooting /> <ModalTroubleShooting />
</div> </div>
<div <div
className="flex cursor-pointer items-center gap-x-1 text-[hsla(var(--text-secondary))]" className="flex cursor-pointer items-center gap-x-1 font-semibold text-[hsla(var(--text-secondary))]"
onClick={handleCopy} onClick={handleCopy}
> >
{copied ? ( {copied ? (
@ -138,10 +150,10 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
</div> </div>
<div className="max-h-[80px] w-full overflow-x-auto p-4 py-2"> <div className="max-h-[80px] w-full overflow-x-auto p-4 py-2">
<div <div
className="text-xs leading-relaxed text-[hsla(var(--text-secondary))]" className="font-serif text-xs leading-relaxed text-[hsla(var(--text-secondary))]"
ref={errorDivRef} ref={errorDivRef}
> >
{getErrorTitle()} {errorComponent ? errorComponent : getErrorTitle()}
</div> </div>
</div> </div>
</div> </div>

View File

@ -87,7 +87,7 @@ describe('SystemMonitor', () => {
expect(screen.getByText('Running Models')).toBeInTheDocument() expect(screen.getByText('Running Models')).toBeInTheDocument()
expect(screen.getByText('App Log')).toBeInTheDocument() expect(screen.getByText('App Log')).toBeInTheDocument()
expect(screen.getByText('7.45/14.90 GB')).toBeInTheDocument() expect(screen.getByText('7.45GB / 14.90GB')).toBeInTheDocument()
expect(screen.getByText('30%')).toBeInTheDocument() expect(screen.getByText('30%')).toBeInTheDocument()
}) })

View File

@ -134,8 +134,8 @@ const SystemMonitor = () => {
<div className="flex items-center justify-between gap-2"> <div className="flex items-center justify-between gap-2">
<h6 className="font-bold">Memory</h6> <h6 className="font-bold">Memory</h6>
<span> <span>
{toGigabytes(usedRam, { hideUnit: true })}/ {toGigabytes(usedRam, { hideUnit: true })}GB /{' '}
{toGigabytes(totalRam, { hideUnit: true })} GB {toGigabytes(totalRam, { hideUnit: true })}GB
</span> </span>
</div> </div>
<div className="flex items-center gap-x-4"> <div className="flex items-center gap-x-4">
@ -149,10 +149,12 @@ const SystemMonitor = () => {
</div> </div>
{gpus.length > 0 && ( {gpus.length > 0 && (
<div className="mb-4 border-b border-[hsla(var(--app-border))] pb-4 last:border-none"> <div className="mb-4 border-b border-[hsla(var(--app-border))] pb-4 last:border-none">
{gpus.map((gpu, index) => { {gpus
.filter((gpu) => gpu.activated === true)
.map((gpu, index) => {
const gpuUtilization = utilizedMemory( const gpuUtilization = utilizedMemory(
gpu.memoryFree, gpu.free_vram,
gpu.memoryTotal gpu.total_vram
) )
return ( return (
<div key={index} className="mt-4 flex flex-col gap-x-2"> <div key={index} className="mt-4 flex flex-col gap-x-2">
@ -163,8 +165,8 @@ const SystemMonitor = () => {
<div className="flex gap-x-2"> <div className="flex gap-x-2">
<div className=""> <div className="">
<span> <span>
{gpu.memoryTotal - gpu.memoryFree}/ {gpu.total_vram - gpu.free_vram}/
{gpu.memoryTotal} {gpu.total_vram}
</span> </span>
<span> MB</span> <span> MB</span>
</div> </div>

View File

@ -25,6 +25,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal'
import ImportingModelModal from '@/screens/Settings/ImportingModelModal' import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
import SelectingModelModal from '@/screens/Settings/SelectingModelModal' import SelectingModelModal from '@/screens/Settings/SelectingModelModal'
import { getAppDistinctId, updateDistinctId } from '@/utils/settings'
import LoadingModal from '../LoadingModal' import LoadingModal from '../LoadingModal'
import MainViewContainer from '../MainViewContainer' import MainViewContainer from '../MainViewContainer'
@ -96,8 +98,16 @@ const BaseLayout = () => {
return properties return properties
}, },
}) })
// Attempt to restore distinct Id from app global settings
getAppDistinctId()
.then((id) => {
if (id) posthog.identify(id)
})
.finally(() => {
posthog.opt_in_capturing() posthog.opt_in_capturing()
posthog.register({ app_version: VERSION }) posthog.register({ app_version: VERSION })
updateDistinctId(posthog.get_distinct_id())
})
} else { } else {
posthog.opt_out_capturing() posthog.opt_out_capturing()
} }

View File

@ -28,6 +28,8 @@ import ModelLabel from '@/containers/ModelLabel'
import SetupRemoteModel from '@/containers/SetupRemoteModel' import SetupRemoteModel from '@/containers/SetupRemoteModel'
import { useActiveModel } from '@/hooks/useActiveModel'
import { useCreateNewThread } from '@/hooks/useCreateNewThread' import { useCreateNewThread } from '@/hooks/useCreateNewThread'
import useDownloadModel from '@/hooks/useDownloadModel' import useDownloadModel from '@/hooks/useDownloadModel'
import { modelDownloadStateAtom } from '@/hooks/useDownloadState' import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
@ -40,7 +42,7 @@ import useUpdateModelParameters from '@/hooks/useUpdateModelParameters'
import { formatDownloadPercentage, toGigabytes } from '@/utils/converter' import { formatDownloadPercentage, toGigabytes } from '@/utils/converter'
import { manualRecommendationModel } from '@/utils/model' import { manualRecommendationModel } from '@/utils/model'
import { getLogoEngine } from '@/utils/modelEngine' import { getLogoEngine, getTitleByEngine } from '@/utils/modelEngine'
import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom' import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
import { import {
@ -93,6 +95,7 @@ const ModelDropdown = ({
const { updateModelParameter } = useUpdateModelParameters() const { updateModelParameter } = useUpdateModelParameters()
const searchInputRef = useRef<HTMLInputElement>(null) const searchInputRef = useRef<HTMLInputElement>(null)
const configuredModels = useAtomValue(configuredModelsAtom) const configuredModels = useAtomValue(configuredModelsAtom)
const { stopModel } = useActiveModel()
const featuredModels = configuredModels.filter( const featuredModels = configuredModels.filter(
(x) => (x) =>
@ -226,6 +229,7 @@ const ModelDropdown = ({
const model = downloadedModels.find((m) => m.id === modelId) const model = downloadedModels.find((m) => m.id === modelId)
setSelectedModel(model) setSelectedModel(model)
setOpen(false) setOpen(false)
stopModel()
if (activeThread) { if (activeThread) {
// Change assistand tools based on model support RAG // Change assistand tools based on model support RAG
@ -248,18 +252,13 @@ const ModelDropdown = ({
], ],
}) })
const defaultContextLength = Math.min( const contextLength = model?.settings.ctx_len
8192, ? Math.min(8192, model?.settings.ctx_len ?? 8192)
model?.settings.ctx_len ?? 8192 : undefined
)
const overriddenParameters = { const overriddenParameters = {
ctx_len: model?.settings.ctx_len ? defaultContextLength : undefined, ctx_len: contextLength,
max_tokens: defaultContextLength max_tokens: contextLength
? Math.min( ? Math.min(model?.parameters.max_tokens ?? 8192, contextLength)
model?.parameters.max_tokens ?? 8192,
defaultContextLength
)
: model?.parameters.max_tokens, : model?.parameters.max_tokens,
} }
@ -289,6 +288,7 @@ const ModelDropdown = ({
updateThreadMetadata, updateThreadMetadata,
setThreadModelParams, setThreadModelParams,
updateModelParameter, updateModelParameter,
stopModel,
] ]
) )
@ -429,7 +429,7 @@ const ModelDropdown = ({
/> />
)} )}
<h6 className="font-medium capitalize text-[hsla(var(--text-secondary))]"> <h6 className="font-medium capitalize text-[hsla(var(--text-secondary))]">
{engine.name} {getTitleByEngine(engine.name)}
</h6> </h6>
</div> </div>
<div className="-mr-2 flex gap-1"> <div className="-mr-2 flex gap-1">
@ -475,7 +475,7 @@ const ModelDropdown = ({
> >
<div className="flex items-center gap-2"> <div className="flex items-center gap-2">
<p <p
className="line-clamp-1 text-[hsla(var(--text-secondary))]" className="max-w-[200px] overflow-hidden truncate whitespace-nowrap text-[hsla(var(--text-secondary))]"
title={model.name} title={model.name}
> >
{model.name} {model.name}
@ -549,6 +549,8 @@ const ModelDropdown = ({
(c) => c.id === model.id (c) => c.id === model.id
) )
return ( return (
<>
{isDownloaded && (
<li <li
key={model.id} key={model.id}
className={twMerge( className={twMerge(
@ -558,7 +560,10 @@ const ModelDropdown = ({
: 'text-[hsla(var(--text-primary))]' : 'text-[hsla(var(--text-primary))]'
)} )}
onClick={() => { onClick={() => {
if (!isConfigured && engine.type === 'remote') if (
!isConfigured &&
engine.type === 'remote'
)
return null return null
if (isDownloaded) { if (isDownloaded) {
onClickModelItem(model.id) onClickModelItem(model.id)
@ -568,7 +573,7 @@ const ModelDropdown = ({
<div className="flex gap-x-2"> <div className="flex gap-x-2">
<p <p
className={twMerge( className={twMerge(
'line-clamp-1', 'max-w-[200px] overflow-hidden truncate whitespace-nowrap',
!isDownloaded && !isDownloaded &&
'text-[hsla(var(--text-secondary))]' 'text-[hsla(var(--text-secondary))]'
)} )}
@ -618,6 +623,8 @@ const ModelDropdown = ({
)} )}
</div> </div>
</li> </li>
)}
</>
) )
})} })}
</ul> </ul>

View File

@ -29,15 +29,20 @@ const ModelLabel = ({ size, compact }: Props) => {
const { settings } = useSettings() const { settings } = useSettings()
const getLabel = (size: number) => { const getLabel = (size: number) => {
const minimumRamModel = size * 1.25 const minimumRamModel = (size * 1.25) / (1024 * 1024)
const availableRam =
settings?.run_mode === 'gpu' const availableRam = settings?.gpus?.some((gpu) => gpu.activated)
? availableVram * 1000000 // MB to bytes ? availableVram * 1000000 // MB to bytes
: totalRam - usedRam + (activeModel?.metadata?.size ?? 0) : totalRam -
(usedRam +
(activeModel?.metadata?.size
? (activeModel.metadata.size * 1.25) / (1024 * 1024)
: 0))
if (minimumRamModel > totalRam) { if (minimumRamModel > totalRam) {
return ( return (
<NotEnoughMemoryLabel <NotEnoughMemoryLabel
unit={settings?.run_mode === 'gpu' ? 'VRAM' : 'RAM'} unit={settings?.gpus?.some((gpu) => gpu.activated) ? 'VRAM' : 'RAM'}
compact={compact} compact={compact}
/> />
) )

View File

@ -143,8 +143,7 @@ export default function ModelHandler() {
return return
} }
// The thread title should not be updated if the message is less than 10 words // No new line character is presented in the title
// And no new line character is present
// And non-alphanumeric characters should be removed // And non-alphanumeric characters should be removed
if (messageContent.includes('\n')) { if (messageContent.includes('\n')) {
messageContent = messageContent.replace(/\n/g, ' ') messageContent = messageContent.replace(/\n/g, ' ')

View File

@ -93,14 +93,8 @@ const ServerLogs = (props: ServerLogsProps) => {
}, [listRef.current?.scrollHeight, isUserManuallyScrollingUp, logs]) }, [listRef.current?.scrollHeight, isUserManuallyScrollingUp, logs])
return ( return (
<ScrollArea <>
ref={listRef} <div>
className={twMerge(
'h-[calc(100%-49px)] w-full p-4 py-0',
logs.length === 0 && 'mx-auto'
)}
onScroll={handleScroll}
>
{withCopy && ( {withCopy && (
<div className="absolute right-2 top-7"> <div className="absolute right-2 top-7">
<div className="flex w-full flex-row gap-2"> <div className="flex w-full flex-row gap-2">
@ -140,6 +134,15 @@ const ServerLogs = (props: ServerLogsProps) => {
</div> </div>
</div> </div>
)} )}
</div>
<ScrollArea
ref={listRef}
className={twMerge(
'h-[calc(100%-49px)] w-full p-4 py-0',
logs.length === 0 && 'mx-auto'
)}
onScroll={handleScroll}
>
<div className="flex h-full w-full flex-col"> <div className="flex h-full w-full flex-col">
{logs.length > 0 ? ( {logs.length > 0 ? (
<code className="inline-block max-w-[38vw] whitespace-break-spaces text-[13px] lg:max-w-[40vw] xl:max-w-[50vw]"> <code className="inline-block max-w-[38vw] whitespace-break-spaces text-[13px] lg:max-w-[40vw] xl:max-w-[50vw]">
@ -155,7 +158,7 @@ const ServerLogs = (props: ServerLogsProps) => {
<div <div
className={twMerge( className={twMerge(
'mt-24 flex w-full flex-col items-center justify-center', 'mt-24 flex w-full flex-col items-center justify-center',
withCopy && 'mt-0 py-2' withCopy && 'mt-4 py-2'
)} )}
> >
<svg <svg
@ -287,11 +290,14 @@ const ServerLogs = (props: ServerLogsProps) => {
</linearGradient> </linearGradient>
</defs> </defs>
</svg> </svg>
<p className="text-[hsla(var(--text-secondary)] mt-4">Empty logs</p> <p className="text-[hsla(var(--text-secondary)] mt-4">
Empty logs
</p>
</div> </div>
)} )}
</div> </div>
</ScrollArea> </ScrollArea>
</>
) )
} }

View File

@ -73,7 +73,7 @@ const SliderRightPanel = ({
trigger={ trigger={
<Input <Input
type="text" type="text"
className="-mt-4 h-8 w-[60px]" className="-mt-4 h-8 w-[68px]"
min={min} min={min}
max={max} max={max}
value={val} value={val}

View File

@ -8,6 +8,8 @@ export const mainViewStateAtom = atom<MainViewState>(MainViewState.Thread)
export const defaultJanDataFolderAtom = atom<string>('') export const defaultJanDataFolderAtom = atom<string>('')
export const LocalEngineDefaultVariantAtom = atom<string>('')
const SHOW_RIGHT_PANEL = 'showRightPanel' const SHOW_RIGHT_PANEL = 'showRightPanel'
// Store panel atom // Store panel atom

Some files were not shown because too many files have changed in this diff Show More