Merge pull request #4683 from janhq/chore/sync-release-to-dev
chore: sync release v0.5.15 branch into dev branch
This commit is contained in:
commit
c4d7a143eb
40
.github/workflows/jan-electron-build-beta.yml
vendored
40
.github/workflows/jan-electron-build-beta.yml
vendored
@ -9,31 +9,6 @@ jobs:
|
||||
get-update-version:
|
||||
uses: ./.github/workflows/template-get-update-version.yml
|
||||
|
||||
create-draft-release:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
|
||||
outputs:
|
||||
upload_url: ${{ steps.create_release.outputs.upload_url }}
|
||||
version: ${{ steps.get_version.outputs.version }}
|
||||
permissions:
|
||||
contents: write
|
||||
steps:
|
||||
- name: Extract tag name without v prefix
|
||||
id: get_version
|
||||
run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}"
|
||||
env:
|
||||
GITHUB_REF: ${{ github.ref }}
|
||||
- name: Create Draft Release
|
||||
id: create_release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ github.ref_name }}
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
name: "${{ env.VERSION }}"
|
||||
draft: true
|
||||
prerelease: false
|
||||
generate_release_notes: true
|
||||
|
||||
build-macos:
|
||||
uses: ./.github/workflows/template-build-macos.yml
|
||||
secrets: inherit
|
||||
@ -65,7 +40,7 @@ jobs:
|
||||
beta: true
|
||||
|
||||
sync-temp-to-latest:
|
||||
needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64]
|
||||
needs: [build-macos, build-windows-x64, build-linux-x64]
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
@ -82,19 +57,15 @@ jobs:
|
||||
AWS_DEFAULT_REGION: ${{ secrets.DELTA_AWS_REGION }}
|
||||
AWS_EC2_METADATA_DISABLED: "true"
|
||||
|
||||
- name: set release to prerelease
|
||||
run: |
|
||||
gh release edit v${{ needs.create-draft-release.outputs.version }} --draft=false --prerelease
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
noti-discord-and-update-url-readme:
|
||||
needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64, sync-temp-to-latest]
|
||||
needs: [build-macos, get-update-version, build-windows-x64, build-linux-x64, sync-temp-to-latest]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Set version to environment variable
|
||||
run: |
|
||||
echo "VERSION=${{ needs.create-draft-release.outputs.version }}" >> $GITHUB_ENV
|
||||
VERSION=${{ needs.get-update-version.outputs.new_version }}
|
||||
VERSION="${VERSION#v}"
|
||||
echo "VERSION=$VERSION" >> $GITHUB_ENV
|
||||
|
||||
- name: Notify Discord
|
||||
uses: Ilshidur/action-discord@master
|
||||
@ -105,6 +76,5 @@ jobs:
|
||||
- macOS Universal: https://delta.jan.ai/beta/jan-beta-mac-universal-{{ VERSION }}.dmg
|
||||
- Linux Deb: https://delta.jan.ai/beta/jan-beta-linux-amd64-{{ VERSION }}.deb
|
||||
- Linux AppImage: https://delta.jan.ai/beta/jan-beta-linux-x86_64-{{ VERSION }}.AppImage
|
||||
- Github Release URL: https://github.com/janhq/jan/releases/tag/v{{ VERSION }}
|
||||
env:
|
||||
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_JAN_BETA }}
|
||||
39
.github/workflows/template-build-jan-server.yml
vendored
39
.github/workflows/template-build-jan-server.yml
vendored
@ -1,39 +0,0 @@
|
||||
name: build-jan-server
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
dockerfile_path:
|
||||
required: false
|
||||
type: string
|
||||
default: './Dockerfile'
|
||||
docker_image_tag:
|
||||
required: true
|
||||
type: string
|
||||
default: 'ghcr.io/janhq/jan-server:dev-latest'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: janhq/jan-server
|
||||
permissions:
|
||||
packages: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v3
|
||||
with:
|
||||
context: .
|
||||
file: ${{ inputs.dockerfile_path }}
|
||||
push: true
|
||||
tags: ${{ inputs.docker_image_tag }}
|
||||
@ -83,7 +83,7 @@ jobs:
|
||||
cat ./electron/package.json
|
||||
echo "------------------------"
|
||||
cat ./package.json
|
||||
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
|
||||
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
|
||||
mv /tmp/package.json electron/package.json
|
||||
cat electron/package.json
|
||||
|
||||
|
||||
2
.github/workflows/template-build-macos.yml
vendored
2
.github/workflows/template-build-macos.yml
vendored
@ -99,7 +99,7 @@ jobs:
|
||||
cat ./electron/package.json
|
||||
echo "------------------------"
|
||||
cat ./package.json
|
||||
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
|
||||
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
|
||||
mv /tmp/package.json electron/package.json
|
||||
cat electron/package.json
|
||||
|
||||
|
||||
@ -108,7 +108,7 @@ jobs:
|
||||
cat ./package.json
|
||||
echo "------------------------"
|
||||
cat ./electron/scripts/uninstaller.nsh
|
||||
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
|
||||
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
|
||||
mv /tmp/package.json electron/package.json
|
||||
cat electron/package.json
|
||||
|
||||
|
||||
@ -12,6 +12,7 @@ export enum ExtensionTypeEnum {
|
||||
SystemMonitoring = 'systemMonitoring',
|
||||
HuggingFace = 'huggingFace',
|
||||
Engine = 'engine',
|
||||
Hardware = 'hardware',
|
||||
}
|
||||
|
||||
export interface ExtensionType {
|
||||
|
||||
@ -38,8 +38,14 @@ describe('OAIEngine', () => {
|
||||
|
||||
it('should subscribe to events on load', () => {
|
||||
engine.onLoad()
|
||||
expect(events.on).toHaveBeenCalledWith(MessageEvent.OnMessageSent, expect.any(Function))
|
||||
expect(events.on).toHaveBeenCalledWith(InferenceEvent.OnInferenceStopped, expect.any(Function))
|
||||
expect(events.on).toHaveBeenCalledWith(
|
||||
MessageEvent.OnMessageSent,
|
||||
expect.any(Function)
|
||||
)
|
||||
expect(events.on).toHaveBeenCalledWith(
|
||||
InferenceEvent.OnInferenceStopped,
|
||||
expect.any(Function)
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle inference request', async () => {
|
||||
@ -77,7 +83,12 @@ describe('OAIEngine', () => {
|
||||
expect(events.emit).toHaveBeenCalledWith(
|
||||
MessageEvent.OnMessageUpdate,
|
||||
expect.objectContaining({
|
||||
content: [{ type: ContentType.Text, text: { value: 'test response', annotations: [] } }],
|
||||
content: [
|
||||
{
|
||||
type: ContentType.Text,
|
||||
text: { value: 'test response', annotations: [] },
|
||||
},
|
||||
],
|
||||
status: MessageStatus.Ready,
|
||||
})
|
||||
)
|
||||
@ -101,11 +112,10 @@ describe('OAIEngine', () => {
|
||||
|
||||
await engine.inference(data)
|
||||
|
||||
expect(events.emit).toHaveBeenCalledWith(
|
||||
expect(events.emit).toHaveBeenLastCalledWith(
|
||||
MessageEvent.OnMessageUpdate,
|
||||
expect.objectContaining({
|
||||
content: [{ type: ContentType.Text, text: { value: 'test error', annotations: [] } }],
|
||||
status: MessageStatus.Error,
|
||||
status: 'error',
|
||||
error_code: 500,
|
||||
})
|
||||
)
|
||||
|
||||
@ -42,7 +42,9 @@ export abstract class OAIEngine extends AIEngine {
|
||||
*/
|
||||
override onLoad() {
|
||||
super.onLoad()
|
||||
events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data))
|
||||
events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
|
||||
this.inference(data)
|
||||
)
|
||||
events.on(InferenceEvent.OnInferenceStopped, () => this.stopInference())
|
||||
}
|
||||
|
||||
@ -128,7 +130,9 @@ export abstract class OAIEngine extends AIEngine {
|
||||
events.emit(MessageEvent.OnMessageUpdate, message)
|
||||
},
|
||||
complete: async () => {
|
||||
message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error
|
||||
message.status = message.content.length
|
||||
? MessageStatus.Ready
|
||||
: MessageStatus.Error
|
||||
events.emit(MessageEvent.OnMessageUpdate, message)
|
||||
},
|
||||
error: async (err: any) => {
|
||||
@ -141,7 +145,10 @@ export abstract class OAIEngine extends AIEngine {
|
||||
message.content[0] = {
|
||||
type: ContentType.Text,
|
||||
text: {
|
||||
value: err.message,
|
||||
value:
|
||||
typeof message === 'string'
|
||||
? err.message
|
||||
: (JSON.stringify(err.message) ?? err.detail),
|
||||
annotations: [],
|
||||
},
|
||||
}
|
||||
|
||||
@ -1,14 +1,17 @@
|
||||
import { lastValueFrom, Observable } from 'rxjs'
|
||||
import { requestInference } from './sse'
|
||||
|
||||
import { ReadableStream } from 'stream/web';
|
||||
import { ReadableStream } from 'stream/web'
|
||||
describe('requestInference', () => {
|
||||
it('should send a request to the inference server and return an Observable', () => {
|
||||
// Mock the fetch function
|
||||
const mockFetch: any = jest.fn(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }),
|
||||
json: () =>
|
||||
Promise.resolve({
|
||||
choices: [{ message: { content: 'Generated response' } }],
|
||||
}),
|
||||
headers: new Headers(),
|
||||
redirected: false,
|
||||
status: 200,
|
||||
@ -36,7 +39,10 @@ describe('requestInference', () => {
|
||||
const mockFetch: any = jest.fn(() =>
|
||||
Promise.resolve({
|
||||
ok: false,
|
||||
json: () => Promise.resolve({ error: { message: 'Wrong API Key', code: 'invalid_api_key' } }),
|
||||
json: () =>
|
||||
Promise.resolve({
|
||||
error: { message: 'Invalid API Key.', code: 'invalid_api_key' },
|
||||
}),
|
||||
headers: new Headers(),
|
||||
redirected: false,
|
||||
status: 401,
|
||||
@ -56,7 +62,10 @@ describe('requestInference', () => {
|
||||
|
||||
// Assert the expected behavior
|
||||
expect(result).toBeInstanceOf(Observable)
|
||||
expect(lastValueFrom(result)).rejects.toEqual({ message: 'Wrong API Key', code: 'invalid_api_key' })
|
||||
expect(lastValueFrom(result)).rejects.toEqual({
|
||||
message: 'Invalid API Key.',
|
||||
code: 'invalid_api_key',
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@ -65,7 +74,10 @@ describe('requestInference', () => {
|
||||
const mockFetch: any = jest.fn(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }),
|
||||
json: () =>
|
||||
Promise.resolve({
|
||||
choices: [{ message: { content: 'Generated response' } }],
|
||||
}),
|
||||
headers: new Headers(),
|
||||
redirected: false,
|
||||
status: 200,
|
||||
@ -78,17 +90,24 @@ describe('requestInference', () => {
|
||||
const inferenceUrl = 'https://inference-server.com'
|
||||
const requestBody = { message: 'Hello' }
|
||||
const model = { id: 'model-id', parameters: { stream: false } }
|
||||
const transformResponse = (data: any) => data.choices[0].message.content.toUpperCase()
|
||||
const transformResponse = (data: any) =>
|
||||
data.choices[0].message.content.toUpperCase()
|
||||
|
||||
// Call the function
|
||||
const result = requestInference(inferenceUrl, requestBody, model, undefined, undefined, transformResponse)
|
||||
const result = requestInference(
|
||||
inferenceUrl,
|
||||
requestBody,
|
||||
model,
|
||||
undefined,
|
||||
undefined,
|
||||
transformResponse
|
||||
)
|
||||
|
||||
// Assert the expected behavior
|
||||
expect(result).toBeInstanceOf(Observable)
|
||||
expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE')
|
||||
})
|
||||
|
||||
|
||||
it('should handle a successful response with streaming enabled', () => {
|
||||
// Mock the fetch function
|
||||
const mockFetch: any = jest.fn(() =>
|
||||
@ -96,29 +115,32 @@ describe('requestInference', () => {
|
||||
ok: true,
|
||||
body: new ReadableStream({
|
||||
start(controller) {
|
||||
controller.enqueue(new TextEncoder().encode('data: {"choices": [{"delta": {"content": "Streamed"}}]}'));
|
||||
controller.enqueue(new TextEncoder().encode('data: [DONE]'));
|
||||
controller.close();
|
||||
}
|
||||
controller.enqueue(
|
||||
new TextEncoder().encode(
|
||||
'data: {"choices": [{"delta": {"content": "Streamed"}}]}'
|
||||
)
|
||||
)
|
||||
controller.enqueue(new TextEncoder().encode('data: [DONE]'))
|
||||
controller.close()
|
||||
},
|
||||
}),
|
||||
headers: new Headers(),
|
||||
redirected: false,
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
})
|
||||
);
|
||||
jest.spyOn(global, 'fetch').mockImplementation(mockFetch);
|
||||
)
|
||||
jest.spyOn(global, 'fetch').mockImplementation(mockFetch)
|
||||
|
||||
// Define the test inputs
|
||||
const inferenceUrl = 'https://inference-server.com';
|
||||
const requestBody = { message: 'Hello' };
|
||||
const model = { id: 'model-id', parameters: { stream: true } };
|
||||
const inferenceUrl = 'https://inference-server.com'
|
||||
const requestBody = { message: 'Hello' }
|
||||
const model = { id: 'model-id', parameters: { stream: true } }
|
||||
|
||||
// Call the function
|
||||
const result = requestInference(inferenceUrl, requestBody, model);
|
||||
const result = requestInference(inferenceUrl, requestBody, model)
|
||||
|
||||
// Assert the expected behavior
|
||||
expect(result).toBeInstanceOf(Observable);
|
||||
expect(lastValueFrom(result)).resolves.toEqual('Streamed');
|
||||
});
|
||||
|
||||
expect(result).toBeInstanceOf(Observable)
|
||||
expect(lastValueFrom(result)).resolves.toEqual('Streamed')
|
||||
})
|
||||
|
||||
@ -32,21 +32,20 @@ export function requestInference(
|
||||
})
|
||||
.then(async (response) => {
|
||||
if (!response.ok) {
|
||||
const data = await response.json()
|
||||
let errorCode = ErrorCode.Unknown
|
||||
if (data.error) {
|
||||
errorCode = data.error.code ?? data.error.type ?? ErrorCode.Unknown
|
||||
} else if (response.status === 401) {
|
||||
errorCode = ErrorCode.InvalidApiKey
|
||||
if (response.status === 401) {
|
||||
throw {
|
||||
code: ErrorCode.InvalidApiKey,
|
||||
message: 'Invalid API Key.',
|
||||
}
|
||||
const error = {
|
||||
message: data.error?.message ?? data.message ?? 'Error occurred.',
|
||||
code: errorCode,
|
||||
}
|
||||
subscriber.error(error)
|
||||
subscriber.complete()
|
||||
let data = await response.json()
|
||||
try {
|
||||
handleError(data)
|
||||
} catch (err) {
|
||||
subscriber.error(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
// There could be overriden stream parameter in the model
|
||||
// that is set in request body (transformed payload)
|
||||
if (
|
||||
@ -54,9 +53,10 @@ export function requestInference(
|
||||
model.parameters?.stream === false
|
||||
) {
|
||||
const data = await response.json()
|
||||
if (data.error || data.message) {
|
||||
subscriber.error(data.error ?? data)
|
||||
subscriber.complete()
|
||||
try {
|
||||
handleError(data)
|
||||
} catch (err) {
|
||||
subscriber.error(err)
|
||||
return
|
||||
}
|
||||
if (transformResponse) {
|
||||
@ -91,13 +91,10 @@ export function requestInference(
|
||||
const toParse = cachedLines + line
|
||||
if (!line.includes('data: [DONE]')) {
|
||||
const data = JSON.parse(toParse.replace('data: ', ''))
|
||||
if (
|
||||
'error' in data ||
|
||||
'message' in data ||
|
||||
'detail' in data
|
||||
) {
|
||||
subscriber.error(data.error ?? data)
|
||||
subscriber.complete()
|
||||
try {
|
||||
handleError(data)
|
||||
} catch (err) {
|
||||
subscriber.error(err)
|
||||
return
|
||||
}
|
||||
content += data.choices[0]?.delta?.content ?? ''
|
||||
@ -118,3 +115,18 @@ export function requestInference(
|
||||
.catch((err) => subscriber.error(err))
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle error and normalize it to a common format.
|
||||
* @param data
|
||||
*/
|
||||
const handleError = (data: any) => {
|
||||
if (
|
||||
data.error ||
|
||||
data.message ||
|
||||
data.detail ||
|
||||
(Array.isArray(data) && data.length && data[0].error)
|
||||
) {
|
||||
throw data.error ?? data[0]?.error ?? data
|
||||
}
|
||||
}
|
||||
|
||||
26
core/src/browser/extensions/hardwareManagement.ts
Normal file
26
core/src/browser/extensions/hardwareManagement.ts
Normal file
@ -0,0 +1,26 @@
|
||||
import { HardwareInformation } from '../../types'
|
||||
import { BaseExtension, ExtensionTypeEnum } from '../extension'
|
||||
|
||||
/**
|
||||
* Engine management extension. Persists and retrieves engine management.
|
||||
* @abstract
|
||||
* @extends BaseExtension
|
||||
*/
|
||||
export abstract class HardwareManagementExtension extends BaseExtension {
|
||||
type(): ExtensionTypeEnum | undefined {
|
||||
return ExtensionTypeEnum.Hardware
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns A Promise that resolves to an object of list hardware.
|
||||
*/
|
||||
abstract getHardware(): Promise<HardwareInformation>
|
||||
|
||||
/**
|
||||
* @returns A Promise that resolves to an object of set active gpus.
|
||||
*/
|
||||
abstract setAvtiveGpu(data: { gpus: number[] }): Promise<{
|
||||
message: string
|
||||
activated_gpus: number[]
|
||||
}>
|
||||
}
|
||||
@ -1,6 +1,5 @@
|
||||
import { ConversationalExtension } from './index';
|
||||
import { InferenceExtension } from './index';
|
||||
import { MonitoringExtension } from './index';
|
||||
import { AssistantExtension } from './index';
|
||||
import { ModelExtension } from './index';
|
||||
import * as Engines from './index';
|
||||
@ -14,10 +13,6 @@ describe('index.ts exports', () => {
|
||||
expect(InferenceExtension).toBeDefined();
|
||||
});
|
||||
|
||||
test('should export MonitoringExtension', () => {
|
||||
expect(MonitoringExtension).toBeDefined();
|
||||
});
|
||||
|
||||
test('should export AssistantExtension', () => {
|
||||
expect(AssistantExtension).toBeDefined();
|
||||
});
|
||||
|
||||
@ -9,10 +9,7 @@ export { ConversationalExtension } from './conversational'
|
||||
*/
|
||||
export { InferenceExtension } from './inference'
|
||||
|
||||
/**
|
||||
* Monitoring extension for system monitoring.
|
||||
*/
|
||||
export { MonitoringExtension } from './monitoring'
|
||||
|
||||
|
||||
/**
|
||||
* Assistant extension for managing assistants.
|
||||
@ -33,3 +30,8 @@ export * from './engines'
|
||||
* Engines Management
|
||||
*/
|
||||
export * from './enginesManagement'
|
||||
|
||||
/**
|
||||
* Hardware Management
|
||||
*/
|
||||
export * from './hardwareManagement'
|
||||
|
||||
@ -1,42 +0,0 @@
|
||||
|
||||
import { ExtensionTypeEnum } from '../extension';
|
||||
import { MonitoringExtension } from './monitoring';
|
||||
|
||||
it('should have the correct type', () => {
|
||||
class TestMonitoringExtension extends MonitoringExtension {
|
||||
getGpuSetting(): Promise<GpuSetting | undefined> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
getResourcesInfo(): Promise<any> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
getCurrentLoad(): Promise<any> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
getOsInfo(): Promise<OperatingSystemInfo> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
}
|
||||
const monitoringExtension = new TestMonitoringExtension();
|
||||
expect(monitoringExtension.type()).toBe(ExtensionTypeEnum.SystemMonitoring);
|
||||
});
|
||||
|
||||
|
||||
it('should create an instance of MonitoringExtension', () => {
|
||||
class TestMonitoringExtension extends MonitoringExtension {
|
||||
getGpuSetting(): Promise<GpuSetting | undefined> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
getResourcesInfo(): Promise<any> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
getCurrentLoad(): Promise<any> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
getOsInfo(): Promise<OperatingSystemInfo> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
}
|
||||
const monitoringExtension = new TestMonitoringExtension();
|
||||
expect(monitoringExtension).toBeInstanceOf(MonitoringExtension);
|
||||
});
|
||||
@ -1,20 +0,0 @@
|
||||
import { BaseExtension, ExtensionTypeEnum } from '../extension'
|
||||
import { GpuSetting, MonitoringInterface, OperatingSystemInfo } from '../../types'
|
||||
|
||||
/**
|
||||
* Monitoring extension for system monitoring.
|
||||
* @extends BaseExtension
|
||||
*/
|
||||
export abstract class MonitoringExtension extends BaseExtension implements MonitoringInterface {
|
||||
/**
|
||||
* Monitoring extension type.
|
||||
*/
|
||||
type(): ExtensionTypeEnum | undefined {
|
||||
return ExtensionTypeEnum.SystemMonitoring
|
||||
}
|
||||
|
||||
abstract getGpuSetting(): Promise<GpuSetting | undefined>
|
||||
abstract getResourcesInfo(): Promise<any>
|
||||
abstract getCurrentLoad(): Promise<any>
|
||||
abstract getOsInfo(): Promise<OperatingSystemInfo>
|
||||
}
|
||||
@ -1,4 +1,5 @@
|
||||
export type AppConfiguration = {
|
||||
data_folder: string
|
||||
quick_ask: boolean
|
||||
distinct_id?: string
|
||||
}
|
||||
|
||||
@ -18,6 +18,7 @@ export type EngineMetadata = {
|
||||
template?: string
|
||||
}
|
||||
}
|
||||
explore_models_url?: string
|
||||
}
|
||||
|
||||
export type EngineVariant = {
|
||||
|
||||
55
core/src/types/hardware/index.ts
Normal file
55
core/src/types/hardware/index.ts
Normal file
@ -0,0 +1,55 @@
|
||||
export type Cpu = {
|
||||
arch: string
|
||||
cores: number
|
||||
instructions: string[]
|
||||
model: string
|
||||
usage: number
|
||||
}
|
||||
|
||||
export type GpuAdditionalInformation = {
|
||||
compute_cap: string
|
||||
driver_version: string
|
||||
}
|
||||
|
||||
export type Gpu = {
|
||||
activated: boolean
|
||||
additional_information?: GpuAdditionalInformation
|
||||
free_vram: number
|
||||
id: string
|
||||
name: string
|
||||
total_vram: number
|
||||
uuid: string
|
||||
version: string
|
||||
}
|
||||
|
||||
export type Os = {
|
||||
name: string
|
||||
version: string
|
||||
}
|
||||
|
||||
export type Power = {
|
||||
battery_life: number
|
||||
charging_status: string
|
||||
is_power_saving: boolean
|
||||
}
|
||||
|
||||
export type Ram = {
|
||||
available: number
|
||||
total: number
|
||||
type: string
|
||||
}
|
||||
|
||||
export type Storage = {
|
||||
available: number
|
||||
total: number
|
||||
type: string
|
||||
}
|
||||
|
||||
export type HardwareInformation = {
|
||||
cpu: Cpu
|
||||
gpus: Gpu[]
|
||||
os: Os
|
||||
power: Power
|
||||
ram: Ram
|
||||
storage: Storage
|
||||
}
|
||||
@ -4,7 +4,6 @@ import * as model from './model';
|
||||
import * as thread from './thread';
|
||||
import * as message from './message';
|
||||
import * as inference from './inference';
|
||||
import * as monitoring from './monitoring';
|
||||
import * as file from './file';
|
||||
import * as config from './config';
|
||||
import * as huggingface from './huggingface';
|
||||
@ -18,7 +17,6 @@ import * as setting from './setting';
|
||||
expect(thread).toBeDefined();
|
||||
expect(message).toBeDefined();
|
||||
expect(inference).toBeDefined();
|
||||
expect(monitoring).toBeDefined();
|
||||
expect(file).toBeDefined();
|
||||
expect(config).toBeDefined();
|
||||
expect(huggingface).toBeDefined();
|
||||
|
||||
@ -3,7 +3,6 @@ export * from './model'
|
||||
export * from './thread'
|
||||
export * from './message'
|
||||
export * from './inference'
|
||||
export * from './monitoring'
|
||||
export * from './file'
|
||||
export * from './config'
|
||||
export * from './huggingface'
|
||||
@ -11,3 +10,4 @@ export * from './miscellaneous'
|
||||
export * from './api'
|
||||
export * from './setting'
|
||||
export * from './engine'
|
||||
export * from './hardware'
|
||||
|
||||
@ -1,33 +1,25 @@
|
||||
import { GpuAdditionalInformation } from '../hardware'
|
||||
|
||||
export type SystemResourceInfo = {
|
||||
memAvailable: number
|
||||
}
|
||||
|
||||
export type RunMode = 'cpu' | 'gpu'
|
||||
|
||||
export type GpuSetting = {
|
||||
notify: boolean
|
||||
run_mode: RunMode
|
||||
nvidia_driver: {
|
||||
exist: boolean
|
||||
version: string
|
||||
}
|
||||
cuda: {
|
||||
exist: boolean
|
||||
version: string
|
||||
}
|
||||
gpus: GpuSettingInfo[]
|
||||
gpu_highest_vram: string
|
||||
gpus_in_use: string[]
|
||||
is_initial: boolean
|
||||
// TODO: This needs to be set based on user toggle in settings
|
||||
vulkan: boolean
|
||||
cpu?: any
|
||||
}
|
||||
|
||||
export type GpuSettingInfo = {
|
||||
activated: boolean
|
||||
free_vram: number
|
||||
id: string
|
||||
vram: string
|
||||
name: string
|
||||
arch?: string
|
||||
total_vram: number
|
||||
uuid: string
|
||||
version: string
|
||||
additional_information?: GpuAdditionalInformation
|
||||
}
|
||||
|
||||
export type SystemInformation = {
|
||||
@ -42,9 +34,6 @@ export type SupportedPlatform = SupportedPlatformTuple[number]
|
||||
export type OperatingSystemInfo = {
|
||||
platform: SupportedPlatform | 'unknown'
|
||||
arch: string
|
||||
release: string
|
||||
machine: string
|
||||
version: string
|
||||
totalMem: number
|
||||
freeMem: number
|
||||
}
|
||||
|
||||
@ -71,7 +71,7 @@ export type Model = {
|
||||
/**
|
||||
* The model identifier, modern version of id.
|
||||
*/
|
||||
mode?: string
|
||||
model?: string
|
||||
|
||||
/**
|
||||
* Human-readable name that is used for UI.
|
||||
@ -150,6 +150,7 @@ export type ModelSettingParams = {
|
||||
*/
|
||||
export type ModelRuntimeParams = {
|
||||
temperature?: number
|
||||
max_temperature?: number
|
||||
token_limit?: number
|
||||
top_k?: number
|
||||
top_p?: number
|
||||
|
||||
@ -1,13 +0,0 @@
|
||||
import * as monitoringInterface from './monitoringInterface'
|
||||
import * as resourceInfo from './resourceInfo'
|
||||
|
||||
import * as index from './index'
|
||||
|
||||
it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
|
||||
for (const key in monitoringInterface) {
|
||||
expect(index[key]).toBe(monitoringInterface[key])
|
||||
}
|
||||
for (const key in resourceInfo) {
|
||||
expect(index[key]).toBe(resourceInfo[key])
|
||||
}
|
||||
})
|
||||
@ -1,2 +0,0 @@
|
||||
export * from './monitoringInterface'
|
||||
export * from './resourceInfo'
|
||||
@ -1,29 +0,0 @@
|
||||
import { GpuSetting, OperatingSystemInfo } from '../miscellaneous'
|
||||
|
||||
/**
|
||||
* Monitoring extension for system monitoring.
|
||||
* @extends BaseExtension
|
||||
*/
|
||||
export interface MonitoringInterface {
|
||||
/**
|
||||
* Returns information about the system resources.
|
||||
* @returns {Promise<any>} A promise that resolves with the system resources information.
|
||||
*/
|
||||
getResourcesInfo(): Promise<any>
|
||||
|
||||
/**
|
||||
* Returns the current system load.
|
||||
* @returns {Promise<any>} A promise that resolves with the current system load.
|
||||
*/
|
||||
getCurrentLoad(): Promise<any>
|
||||
|
||||
/**
|
||||
* Returns the GPU configuration.
|
||||
*/
|
||||
getGpuSetting(): Promise<GpuSetting | undefined>
|
||||
|
||||
/**
|
||||
* Returns information about the operating system.
|
||||
*/
|
||||
getOsInfo(): Promise<OperatingSystemInfo>
|
||||
}
|
||||
@ -1,6 +0,0 @@
|
||||
export type ResourceInfo = {
|
||||
mem: {
|
||||
totalMemory: number
|
||||
usedMemory: number
|
||||
}
|
||||
}
|
||||
@ -55,7 +55,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
||||
"productName": "Conversational",
|
||||
"version": "1.0.0",
|
||||
"main": "dist/index.js",
|
||||
"description": "This extension enables conversations and state persistence via your filesystem",
|
||||
"description": "This extension enables conversations and state persistence via your filesystem.",
|
||||
"url": "extension://@janhq/conversational-extension/dist/index.js"
|
||||
},
|
||||
"@janhq/inference-anthropic-extension": {
|
||||
@ -70,7 +70,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
||||
"productName": "Anthropic Inference Engine",
|
||||
"version": "1.0.2",
|
||||
"main": "dist/index.js",
|
||||
"description": "This extension enables Anthropic chat completion API calls",
|
||||
"description": "This extension enables Anthropic chat completion API calls.",
|
||||
"url": "extension://@janhq/inference-anthropic-extension/dist/index.js"
|
||||
},
|
||||
"@janhq/inference-triton-trt-llm-extension": {
|
||||
@ -85,7 +85,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
||||
"productName": "Triton-TRT-LLM Inference Engine",
|
||||
"version": "1.0.0",
|
||||
"main": "dist/index.js",
|
||||
"description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option",
|
||||
"description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option.",
|
||||
"url": "extension://@janhq/inference-triton-trt-llm-extension/dist/index.js"
|
||||
},
|
||||
"@janhq/inference-mistral-extension": {
|
||||
@ -100,7 +100,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
||||
"productName": "MistralAI Inference Engine",
|
||||
"version": "1.0.1",
|
||||
"main": "dist/index.js",
|
||||
"description": "This extension enables Mistral chat completion API calls",
|
||||
"description": "This extension enables Mistral chat completion API calls.",
|
||||
"url": "extension://@janhq/inference-mistral-extension/dist/index.js"
|
||||
},
|
||||
"@janhq/inference-martian-extension": {
|
||||
@ -115,7 +115,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
||||
"productName": "Martian Inference Engine",
|
||||
"version": "1.0.1",
|
||||
"main": "dist/index.js",
|
||||
"description": "This extension enables Martian chat completion API calls",
|
||||
"description": "This extension enables Martian chat completion API calls.",
|
||||
"url": "extension://@janhq/inference-martian-extension/dist/index.js"
|
||||
},
|
||||
"@janhq/inference-openrouter-extension": {
|
||||
@ -130,7 +130,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
||||
"productName": "OpenRouter Inference Engine",
|
||||
"version": "1.0.0",
|
||||
"main": "dist/index.js",
|
||||
"description": "This extension enables Open Router chat completion API calls",
|
||||
"description": "This extension enables Open Router chat completion API calls.",
|
||||
"url": "extension://@janhq/inference-openrouter-extension/dist/index.js"
|
||||
},
|
||||
"@janhq/inference-nvidia-extension": {
|
||||
@ -145,7 +145,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
||||
"productName": "NVIDIA NIM Inference Engine",
|
||||
"version": "1.0.1",
|
||||
"main": "dist/index.js",
|
||||
"description": "This extension enables NVIDIA chat completion API calls",
|
||||
"description": "This extension enables NVIDIA chat completion API calls.",
|
||||
"url": "extension://@janhq/inference-nvidia-extension/dist/index.js"
|
||||
},
|
||||
"@janhq/inference-groq-extension": {
|
||||
@ -160,7 +160,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
||||
"productName": "Groq Inference Engine",
|
||||
"version": "1.0.1",
|
||||
"main": "dist/index.js",
|
||||
"description": "This extension enables fast Groq chat completion API calls",
|
||||
"description": "This extension enables fast Groq chat completion API calls.",
|
||||
"url": "extension://@janhq/inference-groq-extension/dist/index.js"
|
||||
},
|
||||
"@janhq/inference-openai-extension": {
|
||||
@ -175,7 +175,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
||||
"productName": "OpenAI Inference Engine",
|
||||
"version": "1.0.2",
|
||||
"main": "dist/index.js",
|
||||
"description": "This extension enables OpenAI chat completion API calls",
|
||||
"description": "This extension enables OpenAI chat completion API calls.",
|
||||
"url": "extension://@janhq/inference-openai-extension/dist/index.js"
|
||||
},
|
||||
"@janhq/inference-cohere-extension": {
|
||||
@ -190,7 +190,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
||||
"productName": "Cohere Inference Engine",
|
||||
"version": "1.0.0",
|
||||
"main": "dist/index.js",
|
||||
"description": "This extension enables Cohere chat completion API calls",
|
||||
"description": "This extension enables Cohere chat completion API calls.",
|
||||
"url": "extension://@janhq/inference-cohere-extension/dist/index.js"
|
||||
},
|
||||
"@janhq/model-extension": {
|
||||
@ -205,7 +205,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
||||
"productName": "Model Management",
|
||||
"version": "1.0.33",
|
||||
"main": "dist/index.js",
|
||||
"description": "Model Management Extension provides model exploration and seamless downloads",
|
||||
"description": "Model Management Extension provides model exploration and seamless downloads.",
|
||||
"url": "extension://@janhq/model-extension/dist/index.js"
|
||||
},
|
||||
"@janhq/monitoring-extension": {
|
||||
@ -220,7 +220,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
||||
"productName": "System Monitoring",
|
||||
"version": "1.0.10",
|
||||
"main": "dist/index.js",
|
||||
"description": "This extension provides system health and OS level data",
|
||||
"description": "This extension provides system health and OS level data.",
|
||||
"url": "extension://@janhq/monitoring-extension/dist/index.js"
|
||||
},
|
||||
"@janhq/assistant-extension": {
|
||||
@ -235,7 +235,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
||||
"productName": "Jan Assistant",
|
||||
"version": "1.0.1",
|
||||
"main": "dist/index.js",
|
||||
"description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models",
|
||||
"description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models.",
|
||||
"url": "extension://@janhq/assistant-extension/dist/index.js"
|
||||
},
|
||||
"@janhq/tensorrt-llm-extension": {
|
||||
|
||||
@ -47,8 +47,8 @@ To add a new remote engine:
|
||||
|-------|-------------|----------|
|
||||
| Engine Name | Name for your engine (e.g., "OpenAI", "Claude") | ✓ |
|
||||
| API URL | The base URL of the provider's API | ✓ |
|
||||
| API Key | Your authentication key from the provider | ✓ |
|
||||
| Model List URL | URL for fetching available models | |
|
||||
| API Key | Your authentication key to activate this engine | ✓ |
|
||||
| Model List URL | The endpoint URL to fetch available models |
|
||||
| API Key Template | Custom authorization header format | |
|
||||
| Request Format Conversion | Function to convert Jan's request format to provider's format | |
|
||||
| Response Format Conversion | Function to convert provider's response format to Jan's format | |
|
||||
|
||||
@ -28,6 +28,7 @@ import { setupReactDevTool } from './utils/dev'
|
||||
import { trayManager } from './managers/tray'
|
||||
import { logSystemInfo } from './utils/system'
|
||||
import { registerGlobalShortcuts } from './utils/shortcut'
|
||||
import { registerLogger } from './utils/logger'
|
||||
|
||||
const preloadPath = join(__dirname, 'preload.js')
|
||||
const rendererPath = join(__dirname, '..', 'renderer')
|
||||
@ -79,6 +80,7 @@ app
|
||||
})
|
||||
.then(setupCore)
|
||||
.then(createUserSpace)
|
||||
.then(registerLogger)
|
||||
.then(migrate)
|
||||
.then(setupExtensions)
|
||||
.then(setupMenu)
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "jan",
|
||||
"version": "0.1.4",
|
||||
"version": "0.1.1737985524",
|
||||
"main": "./build/main.js",
|
||||
"author": "Jan <service@jan.ai>",
|
||||
"license": "MIT",
|
||||
|
||||
@ -1,16 +1,28 @@
|
||||
import fs from 'fs'
|
||||
import {
|
||||
createWriteStream,
|
||||
existsSync,
|
||||
mkdirSync,
|
||||
readdir,
|
||||
stat,
|
||||
unlink,
|
||||
writeFileSync,
|
||||
} from 'fs'
|
||||
import util from 'util'
|
||||
import {
|
||||
getAppConfigurations,
|
||||
getJanDataFolderPath,
|
||||
Logger,
|
||||
LoggerManager,
|
||||
} from '@janhq/core/node'
|
||||
import path, { join } from 'path'
|
||||
|
||||
export class FileLogger extends Logger {
|
||||
/**
|
||||
* File Logger
|
||||
*/
|
||||
export class FileLogger implements Logger {
|
||||
name = 'file'
|
||||
logCleaningInterval: number = 120000
|
||||
timeout: NodeJS.Timeout | null = null
|
||||
timeout: NodeJS.Timeout | undefined
|
||||
appLogPath: string = './'
|
||||
logEnabled: boolean = true
|
||||
|
||||
@ -18,14 +30,13 @@ export class FileLogger extends Logger {
|
||||
logEnabled: boolean = true,
|
||||
logCleaningInterval: number = 120000
|
||||
) {
|
||||
super()
|
||||
this.logEnabled = logEnabled
|
||||
if (logCleaningInterval) this.logCleaningInterval = logCleaningInterval
|
||||
|
||||
const appConfigurations = getAppConfigurations()
|
||||
const logFolderPath = join(appConfigurations.data_folder, 'logs')
|
||||
if (!fs.existsSync(logFolderPath)) {
|
||||
fs.mkdirSync(logFolderPath, { recursive: true })
|
||||
if (!existsSync(logFolderPath)) {
|
||||
mkdirSync(logFolderPath, { recursive: true })
|
||||
}
|
||||
|
||||
this.appLogPath = join(logFolderPath, 'app.log')
|
||||
@ -69,8 +80,8 @@ export class FileLogger extends Logger {
|
||||
const logDirectory = path.join(getJanDataFolderPath(), 'logs')
|
||||
// Perform log cleaning
|
||||
const currentDate = new Date()
|
||||
if (fs.existsSync(logDirectory))
|
||||
fs.readdir(logDirectory, (err, files) => {
|
||||
if (existsSync(logDirectory))
|
||||
readdir(logDirectory, (err, files) => {
|
||||
if (err) {
|
||||
console.error('Error reading log directory:', err)
|
||||
return
|
||||
@ -78,7 +89,7 @@ export class FileLogger extends Logger {
|
||||
|
||||
files.forEach((file) => {
|
||||
const filePath = path.join(logDirectory, file)
|
||||
fs.stat(filePath, (err, stats) => {
|
||||
stat(filePath, (err, stats) => {
|
||||
if (err) {
|
||||
console.error('Error getting file stats:', err)
|
||||
return
|
||||
@ -86,7 +97,7 @@ export class FileLogger extends Logger {
|
||||
|
||||
// Check size
|
||||
if (stats.size > size) {
|
||||
fs.unlink(filePath, (err) => {
|
||||
unlink(filePath, (err) => {
|
||||
if (err) {
|
||||
console.error('Error deleting log file:', err)
|
||||
return
|
||||
@ -103,7 +114,7 @@ export class FileLogger extends Logger {
|
||||
(1000 * 3600 * 24)
|
||||
)
|
||||
if (daysDifference > days) {
|
||||
fs.unlink(filePath, (err) => {
|
||||
unlink(filePath, (err) => {
|
||||
if (err) {
|
||||
console.error('Error deleting log file:', err)
|
||||
return
|
||||
@ -124,15 +135,20 @@ export class FileLogger extends Logger {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write log function implementation
|
||||
* @param message
|
||||
* @param logPath
|
||||
*/
|
||||
const writeLog = (message: string, logPath: string) => {
|
||||
if (!fs.existsSync(logPath)) {
|
||||
if (!existsSync(logPath)) {
|
||||
const logDirectory = path.join(getJanDataFolderPath(), 'logs')
|
||||
if (!fs.existsSync(logDirectory)) {
|
||||
fs.mkdirSync(logDirectory)
|
||||
if (!existsSync(logDirectory)) {
|
||||
mkdirSync(logDirectory)
|
||||
}
|
||||
fs.writeFileSync(logPath, message)
|
||||
writeFileSync(logPath, message)
|
||||
} else {
|
||||
const logFile = fs.createWriteStream(logPath, {
|
||||
const logFile = createWriteStream(logPath, {
|
||||
flags: 'a',
|
||||
})
|
||||
logFile.write(util.format(message) + '\n')
|
||||
@ -140,3 +156,12 @@ const writeLog = (message: string, logPath: string) => {
|
||||
console.debug(message)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Register logger for global application logging
|
||||
*/
|
||||
export const registerLogger = () => {
|
||||
const logger = new FileLogger()
|
||||
LoggerManager.instance().register(logger)
|
||||
logger.cleanLogs()
|
||||
}
|
||||
@ -6,6 +6,8 @@ import groq from './resources/groq.json' with { type: 'json' }
|
||||
import martian from './resources/martian.json' with { type: 'json' }
|
||||
import mistral from './resources/mistral.json' with { type: 'json' }
|
||||
import nvidia from './resources/nvidia.json' with { type: 'json' }
|
||||
import deepseek from './resources/deepseek.json' with { type: 'json' }
|
||||
import googleGemini from './resources/google_gemini.json' with { type: 'json' }
|
||||
|
||||
import anthropicModels from './models/anthropic.json' with { type: 'json' }
|
||||
import cohereModels from './models/cohere.json' with { type: 'json' }
|
||||
@ -15,6 +17,8 @@ import groqModels from './models/groq.json' with { type: 'json' }
|
||||
import martianModels from './models/martian.json' with { type: 'json' }
|
||||
import mistralModels from './models/mistral.json' with { type: 'json' }
|
||||
import nvidiaModels from './models/nvidia.json' with { type: 'json' }
|
||||
import deepseekModels from './models/deepseek.json' with { type: 'json' }
|
||||
import googleGeminiModels from './models/google_gemini.json' with { type: 'json' }
|
||||
|
||||
const engines = [
|
||||
anthropic,
|
||||
@ -25,6 +29,8 @@ const engines = [
|
||||
mistral,
|
||||
martian,
|
||||
nvidia,
|
||||
deepseek,
|
||||
googleGemini,
|
||||
]
|
||||
const models = [
|
||||
...anthropicModels,
|
||||
@ -35,5 +41,7 @@ const models = [
|
||||
...mistralModels,
|
||||
...martianModels,
|
||||
...nvidiaModels,
|
||||
...deepseekModels,
|
||||
...googleGeminiModels,
|
||||
]
|
||||
export { engines, models }
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
"inference_params": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7,
|
||||
"max_temperature": 1.0,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "anthropic"
|
||||
@ -21,6 +22,7 @@
|
||||
"inference_params": {
|
||||
"max_tokens": 8192,
|
||||
"temperature": 0.7,
|
||||
"max_temperature": 1.0,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "anthropic"
|
||||
@ -34,6 +36,7 @@
|
||||
"inference_params": {
|
||||
"max_tokens": 8192,
|
||||
"temperature": 0.7,
|
||||
"max_temperature": 1.0,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "anthropic"
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
"inference_params": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7,
|
||||
"max_temperature": 1.0,
|
||||
"stream": false
|
||||
},
|
||||
"engine": "cohere"
|
||||
@ -21,6 +22,7 @@
|
||||
"inference_params": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7,
|
||||
"max_temperature": 1.0,
|
||||
"stream": false
|
||||
},
|
||||
"engine": "cohere"
|
||||
|
||||
28
extensions/engine-management-extension/models/deepseek.json
Normal file
28
extensions/engine-management-extension/models/deepseek.json
Normal file
@ -0,0 +1,28 @@
|
||||
[
|
||||
{
|
||||
"model": "deepseek-chat",
|
||||
"object": "model",
|
||||
"name": "DeepSeek Chat",
|
||||
"version": "1.0",
|
||||
"description": "The deepseek-chat model has been upgraded to DeepSeek-V3. deepseek-reasoner points to the new model DeepSeek-R1",
|
||||
"inference_params": {
|
||||
"max_tokens": 8192,
|
||||
"temperature": 0.6,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "deepseek"
|
||||
},
|
||||
{
|
||||
"model": "deepseek-reasoner",
|
||||
"object": "model",
|
||||
"name": "DeepSeek R1",
|
||||
"version": "1.0",
|
||||
"description": "CoT (Chain of Thought) is the reasoning content deepseek-reasoner gives before output the final answer. For details, please refer to Reasoning Model.",
|
||||
"inference_params": {
|
||||
"max_tokens": 8192,
|
||||
"temperature": 0.6,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "deepseek"
|
||||
}
|
||||
]
|
||||
@ -0,0 +1,67 @@
|
||||
[
|
||||
{
|
||||
"model": "gemini-2.0-flash",
|
||||
"object": "model",
|
||||
"name": "Gemini 2.0 Flash",
|
||||
"version": "1.0",
|
||||
"description": "Gemini 2.0 Flash delivers next-gen features and improved capabilities, including superior speed, native tool use, multimodal generation, and a 1M token context window.",
|
||||
"inference_params": {
|
||||
"max_tokens": 8192,
|
||||
"temperature": 0.6,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "google_gemini"
|
||||
},
|
||||
{
|
||||
"model": "gemini-2.0-flash-lite-preview",
|
||||
"object": "model",
|
||||
"name": "Gemini 2.0 Flash-Lite Preview",
|
||||
"version": "1.0",
|
||||
"description": "A Gemini 2.0 Flash model optimized for cost efficiency and low latency.",
|
||||
"inference_params": {
|
||||
"max_tokens": 8192,
|
||||
"temperature": 0.6,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "google_gemini"
|
||||
},
|
||||
{
|
||||
"model": "gemini-1.5-flash",
|
||||
"object": "model",
|
||||
"name": "Gemini 1.5 Flash",
|
||||
"version": "1.0",
|
||||
"description": "Gemini 1.5 Flash is a fast and versatile multimodal model for scaling across diverse tasks.",
|
||||
"inference_params": {
|
||||
"max_tokens": 8192,
|
||||
"temperature": 0.6,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "google_gemini"
|
||||
},
|
||||
{
|
||||
"model": "gemini-1.5-flash-8b",
|
||||
"object": "model",
|
||||
"name": "Gemini 1.5 Flash-8B",
|
||||
"version": "1.0",
|
||||
"description": "Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks.",
|
||||
"inference_params": {
|
||||
"max_tokens": 8192,
|
||||
"temperature": 0.6,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "google_gemini"
|
||||
},
|
||||
{
|
||||
"model": "gemini-1.5-pro",
|
||||
"object": "model",
|
||||
"name": "Gemini 1.5 Pro",
|
||||
"version": "1.0",
|
||||
"description": "Gemini 1.5 Pro is a mid-size multimodal model that is optimized for a wide-range of reasoning tasks. 1.5 Pro can process large amounts of data at once, including 2 hours of video, 19 hours of audio, codebases with 60,000 lines of code, or 2,000 pages of text. ",
|
||||
"inference_params": {
|
||||
"max_tokens": 8192,
|
||||
"temperature": 0.6,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "google_gemini"
|
||||
}
|
||||
]
|
||||
@ -8,6 +8,7 @@
|
||||
"inference_params": {
|
||||
"max_tokens": 32000,
|
||||
"temperature": 0.7,
|
||||
"max_temperature": 1.0,
|
||||
"top_p": 0.95,
|
||||
"stream": true
|
||||
},
|
||||
@ -22,6 +23,7 @@
|
||||
"inference_params": {
|
||||
"max_tokens": 32000,
|
||||
"temperature": 0.7,
|
||||
"max_temperature": 1.0,
|
||||
"top_p": 0.95,
|
||||
"stream": true
|
||||
},
|
||||
@ -36,6 +38,7 @@
|
||||
"inference_params": {
|
||||
"max_tokens": 32000,
|
||||
"temperature": 0.7,
|
||||
"max_temperature": 1.0,
|
||||
"top_p": 0.95,
|
||||
"stream": true
|
||||
},
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
"inference_params": {
|
||||
"max_tokens": 1024,
|
||||
"temperature": 0.3,
|
||||
"max_temperature": 1.0,
|
||||
"top_p": 1,
|
||||
"stream": false,
|
||||
"frequency_penalty": 0,
|
||||
|
||||
@ -79,12 +79,7 @@
|
||||
"description": "OpenAI o1 is a new model with complex reasoning",
|
||||
"format": "api",
|
||||
"inference_params": {
|
||||
"max_tokens": 100000,
|
||||
"temperature": 1,
|
||||
"top_p": 1,
|
||||
"stream": true,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
"max_tokens": 100000
|
||||
},
|
||||
"engine": "openai"
|
||||
},
|
||||
@ -97,11 +92,7 @@
|
||||
"format": "api",
|
||||
"inference_params": {
|
||||
"max_tokens": 32768,
|
||||
"temperature": 1,
|
||||
"top_p": 1,
|
||||
"stream": true,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
"stream": true
|
||||
},
|
||||
"engine": "openai"
|
||||
},
|
||||
@ -114,11 +105,20 @@
|
||||
"format": "api",
|
||||
"inference_params": {
|
||||
"max_tokens": 65536,
|
||||
"temperature": 1,
|
||||
"top_p": 1,
|
||||
"stream": true,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
"stream": true
|
||||
},
|
||||
"engine": "openai"
|
||||
},
|
||||
{
|
||||
"model": "o3-mini",
|
||||
"object": "model",
|
||||
"name": "OpenAI o3-mini",
|
||||
"version": "1.0",
|
||||
"description": "OpenAI most recent reasoning model, providing high intelligence at the same cost and latency targets of o1-mini.",
|
||||
"format": "api",
|
||||
"inference_params": {
|
||||
"max_tokens": 100000,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "openai"
|
||||
}
|
||||
|
||||
@ -1,16 +1,91 @@
|
||||
[
|
||||
{
|
||||
"model": "open-router-auto",
|
||||
"model": "deepseek/deepseek-r1:free",
|
||||
"object": "model",
|
||||
"name": "OpenRouter",
|
||||
"name": "DeepSeek: R1",
|
||||
"version": "1.0",
|
||||
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
|
||||
"inference_params": {
|
||||
"max_tokens": 128000,
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
"presence_penalty": 0,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "openrouter"
|
||||
},
|
||||
{
|
||||
"model": "deepseek/deepseek-r1-distill-llama-70b:free",
|
||||
"object": "model",
|
||||
"name": "DeepSeek: R1 Distill Llama 70B",
|
||||
"version": "1.0",
|
||||
"description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
|
||||
"inference_params": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "openrouter"
|
||||
},
|
||||
{
|
||||
"model": "deepseek/deepseek-r1-distill-llama-70b:free",
|
||||
"object": "model",
|
||||
"name": "DeepSeek: R1 Distill Llama 70B",
|
||||
"version": "1.0",
|
||||
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
|
||||
"inference_params": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "openrouter"
|
||||
},
|
||||
{
|
||||
"model": "meta-llama/llama-3.1-405b-instruct:free",
|
||||
"object": "model",
|
||||
"name": "Meta: Llama 3.1 405B Instruct",
|
||||
"version": "1.0",
|
||||
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
|
||||
"inference_params": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "openrouter"
|
||||
},
|
||||
{
|
||||
"model": "qwen/qwen-vl-plus:free",
|
||||
"object": "model",
|
||||
"name": "Qwen: Qwen VL Plus",
|
||||
"version": "1.0",
|
||||
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
|
||||
"inference_params": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "openrouter"
|
||||
},
|
||||
{
|
||||
"model": "qwen/qwen2.5-vl-72b-instruct:free",
|
||||
"object": "model",
|
||||
"name": "Qwen: Qwen2.5 VL 72B Instruct",
|
||||
"version": "1.0",
|
||||
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
|
||||
"inference_params": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0,
|
||||
"stream": true
|
||||
},
|
||||
"engine": "openrouter"
|
||||
}
|
||||
|
||||
@ -29,12 +29,10 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@janhq/core": "../../core/package.tgz",
|
||||
"cpu-instructions": "^0.0.13",
|
||||
"ky": "^1.7.2",
|
||||
"p-queue": "^8.0.1"
|
||||
},
|
||||
"bundledDependencies": [
|
||||
"cpu-instructions",
|
||||
"@janhq/core"
|
||||
],
|
||||
"engines": {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
{
|
||||
"id": "@janhq/inference-anthropic-extension",
|
||||
"id": "anthropic",
|
||||
"type": "remote",
|
||||
"engine": "anthropic",
|
||||
"url": "https://console.anthropic.com/settings/keys",
|
||||
@ -10,13 +10,14 @@
|
||||
"transform_req": {
|
||||
"chat_completions": {
|
||||
"url": "https://api.anthropic.com/v1/messages",
|
||||
"template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": \"{{ input_request.messages.0.content }}\", \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": \"{{ message.role}}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
|
||||
"template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": {{ tojson(input_request.messages.0.content) }}, \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"metadata\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
|
||||
}
|
||||
},
|
||||
"transform_resp": {
|
||||
"chat_completions": {
|
||||
"template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": \"{{ input_request.model }}\", \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"{{ input_request.role }}\", \"content\": {% if input_request.content and input_request.content.0.type == \"text\" %} \"{{input_request.content.0.text}}\" {% else %} null {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.stop_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.usage.input_tokens }}, \"completion_tokens\": {{ input_request.usage.output_tokens }}, \"total_tokens\": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 }, \"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
|
||||
}
|
||||
}
|
||||
"template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {{tojson(input_request)}} {% endif %}"
|
||||
}
|
||||
},
|
||||
"explore_models_url": "https://docs.anthropic.com/en/docs/about-claude/models"
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
{
|
||||
"id": "@janhq/inference-cohere-extension",
|
||||
"id": "cohere",
|
||||
"type": "remote",
|
||||
"engine": "cohere",
|
||||
"url": "https://dashboard.cohere.com/api-keys",
|
||||
@ -10,13 +10,14 @@
|
||||
"transform_req": {
|
||||
"chat_completions": {
|
||||
"url": "https://api.cohere.ai/v1/chat",
|
||||
"template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": \"{{ input_request.messages.0.content }}\", {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
|
||||
"template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": {{ tojson(input_request.messages.0.content) }}, {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
|
||||
}
|
||||
},
|
||||
"transform_resp": {
|
||||
"chat_completions": {
|
||||
"template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.event_type == \"text-generation\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.text }}\" {% else %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.event_type == \"stream-end\" %} \"finish_reason\": \"{{ input_request.finish_reason }}\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.generation_id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": {% if input_request.model %} \"{{ input_request.model }}\" {% else %} \"command-r-plus-08-2024\" {% endif %}, \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"assistant\", \"content\": {% if not input_request.text %} null {% else %} \"{{ input_request.text }}\" {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.finish_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.meta.tokens.input_tokens }}, \"completion_tokens\": {{ input_request.meta.tokens.output_tokens }},\"total_tokens\": {{ input_request.meta.tokens.input_tokens + input_request.meta.tokens.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 },\"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
|
||||
}
|
||||
}
|
||||
},
|
||||
"explore_models_url": "https://docs.cohere.com/v2/docs/models"
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,23 @@
|
||||
{
|
||||
"id": "deepseek",
|
||||
"type": "remote",
|
||||
"engine": "deepseek",
|
||||
"url": "https://platform.deepseek.com/api_keys",
|
||||
"api_key": "",
|
||||
"metadata": {
|
||||
"get_models_url": "https://api.deepseek.com/models",
|
||||
"header_template": "Authorization: Bearer {{api_key}}",
|
||||
"transform_req": {
|
||||
"chat_completions": {
|
||||
"url": "https://api.deepseek.com/chat/completions",
|
||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
||||
}
|
||||
},
|
||||
"transform_resp": {
|
||||
"chat_completions": {
|
||||
"template": "{{tojson(input_request)}}"
|
||||
}
|
||||
},
|
||||
"explore_models_url": "https://api-docs.deepseek.com/quick_start/pricing"
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,23 @@
|
||||
{
|
||||
"id": "google_gemini",
|
||||
"type": "remote",
|
||||
"engine": "google_gemini",
|
||||
"url": "https://aistudio.google.com/apikey",
|
||||
"api_key": "",
|
||||
"metadata": {
|
||||
"get_models_url": "https://generativelanguage.googleapis.com/v1beta/models",
|
||||
"header_template": "Authorization: Bearer {{api_key}}",
|
||||
"transform_req": {
|
||||
"chat_completions": {
|
||||
"url": "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
|
||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
||||
}
|
||||
},
|
||||
"transform_resp": {
|
||||
"chat_completions": {
|
||||
"template": "{{tojson(input_request)}}"
|
||||
}
|
||||
},
|
||||
"explore_models_url": "https://ai.google.dev/gemini-api/docs/models/gemini"
|
||||
}
|
||||
}
|
||||
@ -1,5 +1,5 @@
|
||||
{
|
||||
"id": "@janhq/inference-groq-extension",
|
||||
"id": "groq",
|
||||
"type": "remote",
|
||||
"engine": "groq",
|
||||
"url": "https://console.groq.com/keys",
|
||||
@ -15,8 +15,9 @@
|
||||
},
|
||||
"transform_resp": {
|
||||
"chat_completions": {
|
||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
||||
}
|
||||
}
|
||||
"template": "{{tojson(input_request)}}"
|
||||
}
|
||||
},
|
||||
"explore_models_url": "https://console.groq.com/docs/models"
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
{
|
||||
"id": "@janhq/inference-martian-extension",
|
||||
"id": "martian",
|
||||
"type": "remote",
|
||||
"engine": "martian",
|
||||
"url": "https://withmartian.com/dashboard",
|
||||
@ -15,8 +15,9 @@
|
||||
},
|
||||
"transform_resp": {
|
||||
"chat_completions": {
|
||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
||||
}
|
||||
}
|
||||
"template": "{{tojson(input_request)}}"
|
||||
}
|
||||
},
|
||||
"explore_models_url": "https://withmartian.github.io/llm-adapters/"
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
{
|
||||
"id": "@janhq/inference-mistral-extension",
|
||||
"id": "mistral",
|
||||
"type": "remote",
|
||||
"engine": "mistral",
|
||||
"url": "https://console.mistral.ai/api-keys/",
|
||||
@ -17,6 +17,7 @@
|
||||
"chat_completions": {
|
||||
"template": "{{tojson(input_request)}}"
|
||||
}
|
||||
}
|
||||
},
|
||||
"explore_models_url": "https://docs.mistral.ai/getting-started/models/models_overview/"
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
{
|
||||
"id": "@janhq/inference-nvidia-extension",
|
||||
"id": "nvidia",
|
||||
"type": "remote",
|
||||
"engine": "nvidia",
|
||||
"url": "https://org.ngc.nvidia.com/setup/personal-keys",
|
||||
@ -15,8 +15,9 @@
|
||||
},
|
||||
"transform_resp": {
|
||||
"chat_completions": {
|
||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
||||
}
|
||||
}
|
||||
"template": "{{tojson(input_request)}}"
|
||||
}
|
||||
},
|
||||
"explore_models_url": "https://build.nvidia.com/models"
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
{
|
||||
"id": "@janhq/inference-openai-extension",
|
||||
"id": "openai",
|
||||
"type": "remote",
|
||||
"engine": "openai",
|
||||
"url": "https://platform.openai.com/account/api-keys",
|
||||
@ -10,13 +10,14 @@
|
||||
"transform_req": {
|
||||
"chat_completions": {
|
||||
"url": "https://api.openai.com/v1/chat/completions",
|
||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
|
||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% set first = false %} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
|
||||
}
|
||||
},
|
||||
"transform_resp": {
|
||||
"chat_completions": {
|
||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
||||
}
|
||||
}
|
||||
"template": "{{tojson(input_request)}}"
|
||||
}
|
||||
},
|
||||
"explore_models_url": "https://platform.openai.com/docs/models"
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
{
|
||||
"id": "@janhq/inference-openrouter-extension",
|
||||
"id": "openrouter",
|
||||
"type": "remote",
|
||||
"engine": "openrouter",
|
||||
"url": "https://openrouter.ai/keys",
|
||||
@ -10,13 +10,14 @@
|
||||
"transform_req": {
|
||||
"chat_completions": {
|
||||
"url": "https://openrouter.ai/api/v1/chat/completions",
|
||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
||||
}
|
||||
},
|
||||
"transform_resp": {
|
||||
"chat_completions": {
|
||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
||||
}
|
||||
}
|
||||
"template": "{{tojson(input_request)}}"
|
||||
}
|
||||
},
|
||||
"explore_models_url": "https://openrouter.ai/models"
|
||||
}
|
||||
}
|
||||
|
||||
@ -13,9 +13,19 @@ export default defineConfig([
|
||||
NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
|
||||
API_URL: JSON.stringify('http://127.0.0.1:39291'),
|
||||
SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
|
||||
PLATFORM: JSON.stringify(process.platform),
|
||||
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
|
||||
DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
|
||||
DEFAULT_REMOTE_MODELS: JSON.stringify(models),
|
||||
DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
|
||||
`{ {% set first = true %} {% for key, value in input_request %} {% if key == "messages" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} {% if not first %},{% endif %} "{{ key }}": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }`
|
||||
),
|
||||
DEFAULT_RESPONSE_BODY_TRANSFORM: JSON.stringify(
|
||||
'{{tojson(input_request)}}'
|
||||
),
|
||||
DEFAULT_REQUEST_HEADERS_TRANSFORM: JSON.stringify(
|
||||
'Authorization: Bearer {{api_key}}'
|
||||
),
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -29,15 +39,4 @@ export default defineConfig([
|
||||
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
|
||||
},
|
||||
},
|
||||
{
|
||||
input: 'src/node/cpuInfo.ts',
|
||||
output: {
|
||||
format: 'cjs',
|
||||
file: 'dist/node/cpuInfo.js',
|
||||
},
|
||||
external: ['cpu-instructions'],
|
||||
resolve: {
|
||||
extensions: ['.ts', '.js', '.svg'],
|
||||
},
|
||||
},
|
||||
])
|
||||
|
||||
@ -1,7 +1,11 @@
|
||||
declare const API_URL: string
|
||||
declare const CORTEX_ENGINE_VERSION: string
|
||||
declare const PLATFORM: string
|
||||
declare const SOCKET_URL: string
|
||||
declare const NODE: string
|
||||
declare const DEFAULT_REQUEST_PAYLOAD_TRANSFORM: string
|
||||
declare const DEFAULT_RESPONSE_BODY_TRANSFORM: string
|
||||
declare const DEFAULT_REQUEST_HEADERS_TRANSFORM: string
|
||||
|
||||
declare const DEFAULT_REMOTE_ENGINES: ({
|
||||
id: string
|
||||
|
||||
@ -19,12 +19,16 @@ import ky, { HTTPError } from 'ky'
|
||||
import PQueue from 'p-queue'
|
||||
import { EngineError } from './error'
|
||||
import { getJanDataFolderPath } from '@janhq/core'
|
||||
import { engineVariant } from './utils'
|
||||
|
||||
interface ModelList {
|
||||
data: Model[]
|
||||
}
|
||||
/**
|
||||
* JSONEngineManagementExtension is a EngineManagementExtension implementation that provides
|
||||
* JanEngineManagementExtension is a EngineManagementExtension implementation that provides
|
||||
* functionality for managing engines.
|
||||
*/
|
||||
export default class JSONEngineManagementExtension extends EngineManagementExtension {
|
||||
export default class JanEngineManagementExtension extends EngineManagementExtension {
|
||||
queue = new PQueue({ concurrency: 1 })
|
||||
|
||||
/**
|
||||
@ -63,13 +67,12 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
|
||||
* @returns A Promise that resolves to an object of list engines.
|
||||
*/
|
||||
async getRemoteModels(name: string): Promise<any> {
|
||||
return this.queue.add(() =>
|
||||
ky
|
||||
return ky
|
||||
.get(`${API_URL}/v1/models/remote/${name}`)
|
||||
.json<Model[]>()
|
||||
.then((e) => e)
|
||||
.catch(() => [])
|
||||
) as Promise<Model[]>
|
||||
.json<ModelList>()
|
||||
.catch(() => ({
|
||||
data: [],
|
||||
})) as Promise<ModelList>
|
||||
}
|
||||
|
||||
/**
|
||||
@ -138,9 +141,38 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
|
||||
* Add a new remote engine
|
||||
* @returns A Promise that resolves to intall of engine.
|
||||
*/
|
||||
async addRemoteEngine(engineConfig: EngineConfig) {
|
||||
async addRemoteEngine(
|
||||
engineConfig: EngineConfig,
|
||||
persistModels: boolean = true
|
||||
) {
|
||||
// Populate default settings
|
||||
if (
|
||||
engineConfig.metadata?.transform_req?.chat_completions &&
|
||||
!engineConfig.metadata.transform_req.chat_completions.template
|
||||
)
|
||||
engineConfig.metadata.transform_req.chat_completions.template =
|
||||
DEFAULT_REQUEST_PAYLOAD_TRANSFORM
|
||||
|
||||
if (
|
||||
engineConfig.metadata?.transform_resp?.chat_completions &&
|
||||
!engineConfig.metadata.transform_resp.chat_completions?.template
|
||||
)
|
||||
engineConfig.metadata.transform_resp.chat_completions.template =
|
||||
DEFAULT_RESPONSE_BODY_TRANSFORM
|
||||
|
||||
if (engineConfig.metadata && !engineConfig.metadata?.header_template)
|
||||
engineConfig.metadata.header_template = DEFAULT_REQUEST_HEADERS_TRANSFORM
|
||||
|
||||
return this.queue.add(() =>
|
||||
ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => e)
|
||||
ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => {
|
||||
if (persistModels && engineConfig.metadata?.get_models_url) {
|
||||
// Pull /models from remote models endpoint
|
||||
return this.populateRemoteModels(engineConfig)
|
||||
.then(() => e)
|
||||
.catch(() => e)
|
||||
}
|
||||
return e
|
||||
})
|
||||
) as Promise<{ messages: string }>
|
||||
}
|
||||
|
||||
@ -161,9 +193,25 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
|
||||
* @param model - Remote model object.
|
||||
*/
|
||||
async addRemoteModel(model: Model) {
|
||||
return this.queue.add(() =>
|
||||
ky.post(`${API_URL}/v1/models/add`, { json: model }).then((e) => e)
|
||||
return this.queue
|
||||
.add(() =>
|
||||
ky
|
||||
.post(`${API_URL}/v1/models/add`, {
|
||||
json: {
|
||||
inference_params: {
|
||||
max_tokens: 4096,
|
||||
temperature: 0.7,
|
||||
top_p: 0.95,
|
||||
stream: true,
|
||||
frequency_penalty: 0,
|
||||
presence_penalty: 0,
|
||||
},
|
||||
...model,
|
||||
},
|
||||
})
|
||||
.then((e) => e)
|
||||
)
|
||||
.then(() => {})
|
||||
}
|
||||
|
||||
/**
|
||||
@ -245,11 +293,7 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
|
||||
error instanceof EngineError
|
||||
) {
|
||||
const systemInfo = await systemInformation()
|
||||
const variant = await executeOnMain(
|
||||
NODE,
|
||||
'engineVariant',
|
||||
systemInfo.gpuSetting
|
||||
)
|
||||
const variant = await engineVariant(systemInfo.gpuSetting)
|
||||
await this.setDefaultEngineVariant(InferenceEngine.cortex_llamacpp, {
|
||||
variant: variant,
|
||||
version: `${CORTEX_ENGINE_VERSION}`,
|
||||
@ -293,14 +337,40 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
|
||||
data.api_key = api_key
|
||||
/// END - Migrate legacy api key settings
|
||||
|
||||
await this.addRemoteEngine(data).catch(console.error)
|
||||
await this.addRemoteEngine(data, false).catch(console.error)
|
||||
})
|
||||
)
|
||||
events.emit(EngineEvent.OnEngineUpdate, {})
|
||||
DEFAULT_REMOTE_MODELS.forEach(async (data: Model) => {
|
||||
await this.addRemoteModel(data).catch(() => {})
|
||||
})
|
||||
await Promise.all(
|
||||
DEFAULT_REMOTE_MODELS.map((data: Model) =>
|
||||
this.addRemoteModel(data).catch(() => {})
|
||||
)
|
||||
)
|
||||
events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pulls models list from the remote provider and persist
|
||||
* @param engineConfig
|
||||
* @returns
|
||||
*/
|
||||
private populateRemoteModels = async (engineConfig: EngineConfig) => {
|
||||
return this.getRemoteModels(engineConfig.engine)
|
||||
.then((models: ModelList) => {
|
||||
if (models?.data)
|
||||
Promise.all(
|
||||
models.data.map((model) =>
|
||||
this.addRemoteModel({
|
||||
...model,
|
||||
engine: engineConfig.engine as InferenceEngine,
|
||||
model: model.model ?? model.id,
|
||||
}).catch(console.info)
|
||||
)
|
||||
).then(() => {
|
||||
events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
|
||||
})
|
||||
})
|
||||
.catch(console.info)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,27 +0,0 @@
|
||||
import { cpuInfo } from 'cpu-instructions'
|
||||
|
||||
// Check the CPU info and determine the supported instruction set
|
||||
const info = cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
|
||||
? 'avx512'
|
||||
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
|
||||
? 'avx2'
|
||||
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
|
||||
? 'avx'
|
||||
: 'noavx'
|
||||
|
||||
// Send the result and wait for confirmation before exiting
|
||||
new Promise<void>((resolve, reject) => {
|
||||
// @ts-ignore
|
||||
process.send(info, (error: Error | null) => {
|
||||
if (error) {
|
||||
reject(error)
|
||||
} else {
|
||||
resolve()
|
||||
}
|
||||
})
|
||||
})
|
||||
.then(() => process.exit(0))
|
||||
.catch((error) => {
|
||||
console.error('Failed to send info:', error)
|
||||
process.exit(1)
|
||||
})
|
||||
@ -1,7 +1,6 @@
|
||||
import { describe, expect, it } from '@jest/globals'
|
||||
import engine from './index'
|
||||
import { GpuSetting } from '@janhq/core/node'
|
||||
import { cpuInfo } from 'cpu-instructions'
|
||||
import { GpuSetting } from '@janhq/core'
|
||||
import { fork } from 'child_process'
|
||||
|
||||
let testSettings: GpuSetting = {
|
||||
@ -23,22 +22,12 @@ let testSettings: GpuSetting = {
|
||||
}
|
||||
const originalPlatform = process.platform
|
||||
|
||||
jest.mock('cpu-instructions', () => ({
|
||||
cpuInfo: {
|
||||
cpuInfo: jest.fn(),
|
||||
},
|
||||
}))
|
||||
let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
|
||||
mockCpuInfo.mockReturnValue([])
|
||||
|
||||
jest.mock('@janhq/core/node', () => ({
|
||||
|
||||
jest.mock('@janhq/core', () => ({
|
||||
appResourcePath: () => '.',
|
||||
log: jest.fn(),
|
||||
}))
|
||||
jest.mock('child_process', () => ({
|
||||
fork: jest.fn(),
|
||||
}))
|
||||
const mockFork = fork as jest.Mock
|
||||
|
||||
describe('test executable cortex file', () => {
|
||||
afterAll(function () {
|
||||
@ -48,14 +37,7 @@ describe('test executable cortex file', () => {
|
||||
})
|
||||
|
||||
it('executes on MacOS', () => {
|
||||
const mockProcess = {
|
||||
on: jest.fn((event, callback) => {
|
||||
if (event === 'message') {
|
||||
callback('noavx')
|
||||
}
|
||||
}),
|
||||
send: jest.fn(),
|
||||
}
|
||||
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'darwin',
|
||||
})
|
||||
@ -63,7 +45,7 @@ describe('test executable cortex file', () => {
|
||||
value: 'arm64',
|
||||
})
|
||||
|
||||
mockFork.mockReturnValue(mockProcess)
|
||||
|
||||
expect(engine.engineVariant(testSettings)).resolves.toEqual('mac-arm64')
|
||||
})
|
||||
|
||||
@ -83,7 +65,7 @@ describe('test executable cortex file', () => {
|
||||
}),
|
||||
send: jest.fn(),
|
||||
}
|
||||
mockFork.mockReturnValue(mockProcess)
|
||||
|
||||
Object.defineProperty(process, 'arch', {
|
||||
value: 'x64',
|
||||
})
|
||||
@ -107,7 +89,6 @@ describe('test executable cortex file', () => {
|
||||
}),
|
||||
send: jest.fn(),
|
||||
}
|
||||
mockFork.mockReturnValue(mockProcess)
|
||||
|
||||
expect(engine.engineVariant()).resolves.toEqual('windows-amd64-avx')
|
||||
})
|
||||
@ -145,7 +126,6 @@ describe('test executable cortex file', () => {
|
||||
}),
|
||||
send: jest.fn(),
|
||||
}
|
||||
mockFork.mockReturnValue(mockProcess)
|
||||
|
||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||
'windows-amd64-avx2-cuda-11-7'
|
||||
@ -176,26 +156,11 @@ describe('test executable cortex file', () => {
|
||||
},
|
||||
],
|
||||
}
|
||||
mockFork.mockReturnValue({
|
||||
on: jest.fn((event, callback) => {
|
||||
if (event === 'message') {
|
||||
callback('noavx')
|
||||
}
|
||||
}),
|
||||
send: jest.fn(),
|
||||
})
|
||||
|
||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||
'windows-amd64-noavx-cuda-12-0'
|
||||
)
|
||||
mockFork.mockReturnValue({
|
||||
on: jest.fn((event, callback) => {
|
||||
if (event === 'message') {
|
||||
callback('avx512')
|
||||
}
|
||||
}),
|
||||
send: jest.fn(),
|
||||
})
|
||||
|
||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||
'windows-amd64-avx2-cuda-12-0'
|
||||
)
|
||||
@ -209,14 +174,6 @@ describe('test executable cortex file', () => {
|
||||
...testSettings,
|
||||
run_mode: 'cpu',
|
||||
}
|
||||
mockFork.mockReturnValue({
|
||||
on: jest.fn((event, callback) => {
|
||||
if (event === 'message') {
|
||||
callback('noavx')
|
||||
}
|
||||
}),
|
||||
send: jest.fn(),
|
||||
})
|
||||
|
||||
expect(engine.engineVariant()).resolves.toEqual('linux-amd64-noavx')
|
||||
})
|
||||
@ -245,16 +202,6 @@ describe('test executable cortex file', () => {
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
mockFork.mockReturnValue({
|
||||
on: jest.fn((event, callback) => {
|
||||
if (event === 'message') {
|
||||
callback('avx512')
|
||||
}
|
||||
}),
|
||||
send: jest.fn(),
|
||||
})
|
||||
|
||||
expect(engine.engineVariant(settings)).resolves.toBe(
|
||||
'linux-amd64-avx2-cuda-11-7'
|
||||
)
|
||||
@ -284,14 +231,7 @@ describe('test executable cortex file', () => {
|
||||
},
|
||||
],
|
||||
}
|
||||
mockFork.mockReturnValue({
|
||||
on: jest.fn((event, callback) => {
|
||||
if (event === 'message') {
|
||||
callback('avx2')
|
||||
}
|
||||
}),
|
||||
send: jest.fn(),
|
||||
})
|
||||
|
||||
|
||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||
'linux-amd64-avx2-cuda-12-0'
|
||||
@ -310,15 +250,6 @@ describe('test executable cortex file', () => {
|
||||
|
||||
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||
cpuInstructions.forEach((instruction) => {
|
||||
mockFork.mockReturnValue({
|
||||
on: jest.fn((event, callback) => {
|
||||
if (event === 'message') {
|
||||
callback(instruction)
|
||||
}
|
||||
}),
|
||||
send: jest.fn(),
|
||||
})
|
||||
|
||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||
`linux-amd64-${instruction}`
|
||||
)
|
||||
@ -335,14 +266,7 @@ describe('test executable cortex file', () => {
|
||||
}
|
||||
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||
cpuInstructions.forEach((instruction) => {
|
||||
mockFork.mockReturnValue({
|
||||
on: jest.fn((event, callback) => {
|
||||
if (event === 'message') {
|
||||
callback(instruction)
|
||||
}
|
||||
}),
|
||||
send: jest.fn(),
|
||||
})
|
||||
|
||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||
`windows-amd64-${instruction}`
|
||||
)
|
||||
@ -376,14 +300,7 @@ describe('test executable cortex file', () => {
|
||||
}
|
||||
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||
cpuInstructions.forEach((instruction) => {
|
||||
mockFork.mockReturnValue({
|
||||
on: jest.fn((event, callback) => {
|
||||
if (event === 'message') {
|
||||
callback(instruction)
|
||||
}
|
||||
}),
|
||||
send: jest.fn(),
|
||||
})
|
||||
|
||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||
`windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
|
||||
)
|
||||
@ -417,14 +334,7 @@ describe('test executable cortex file', () => {
|
||||
],
|
||||
}
|
||||
cpuInstructions.forEach((instruction) => {
|
||||
mockFork.mockReturnValue({
|
||||
on: jest.fn((event, callback) => {
|
||||
if (event === 'message') {
|
||||
callback(instruction)
|
||||
}
|
||||
}),
|
||||
send: jest.fn(),
|
||||
})
|
||||
|
||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||
`linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
|
||||
)
|
||||
@ -459,14 +369,7 @@ describe('test executable cortex file', () => {
|
||||
],
|
||||
}
|
||||
cpuInstructions.forEach((instruction) => {
|
||||
mockFork.mockReturnValue({
|
||||
on: jest.fn((event, callback) => {
|
||||
if (event === 'message') {
|
||||
callback(instruction)
|
||||
}
|
||||
}),
|
||||
send: jest.fn(),
|
||||
})
|
||||
|
||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||
`linux-amd64-vulkan`
|
||||
)
|
||||
|
||||
@ -2,111 +2,10 @@ import * as path from 'path'
|
||||
import {
|
||||
appResourcePath,
|
||||
getJanDataFolderPath,
|
||||
GpuSetting,
|
||||
log,
|
||||
} from '@janhq/core/node'
|
||||
import { fork } from 'child_process'
|
||||
import { mkdir, readdir, symlink } from 'fs/promises'
|
||||
|
||||
/**
|
||||
* The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
|
||||
* @param settings
|
||||
* @returns
|
||||
*/
|
||||
const gpuRunMode = (settings?: GpuSetting): string => {
|
||||
if (process.platform === 'darwin')
|
||||
// MacOS now has universal binaries
|
||||
return ''
|
||||
|
||||
if (!settings) return ''
|
||||
|
||||
return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
|
||||
}
|
||||
|
||||
/**
|
||||
* The OS & architecture that the current process is running on.
|
||||
* @returns win, mac-x64, mac-arm64, or linux
|
||||
*/
|
||||
const os = (): string => {
|
||||
return process.platform === 'win32'
|
||||
? 'windows-amd64'
|
||||
: process.platform === 'darwin'
|
||||
? process.arch === 'arm64'
|
||||
? 'mac-arm64'
|
||||
: 'mac-amd64'
|
||||
: 'linux-amd64'
|
||||
}
|
||||
|
||||
/**
|
||||
* The CUDA version that will be set - either '11-7' or '12-0'.
|
||||
* @param settings
|
||||
* @returns
|
||||
*/
|
||||
const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
|
||||
const isUsingCuda =
|
||||
settings?.vulkan !== true &&
|
||||
settings?.run_mode === 'gpu' &&
|
||||
!os().includes('mac')
|
||||
|
||||
if (!isUsingCuda) return undefined
|
||||
return settings?.cuda?.version === '11' ? '11-7' : '12-0'
|
||||
}
|
||||
|
||||
/**
|
||||
* The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
|
||||
* @returns
|
||||
*/
|
||||
const cpuInstructions = async (): Promise<string> => {
|
||||
if (process.platform === 'darwin') return ''
|
||||
|
||||
const child = fork(path.join(__dirname, './cpuInfo.js')) // Path to the child process file
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
child.on('message', (cpuInfo?: string) => {
|
||||
resolve(cpuInfo ?? 'noavx')
|
||||
child.kill() // Kill the child process after receiving the result
|
||||
})
|
||||
|
||||
child.on('error', (err) => {
|
||||
resolve('noavx')
|
||||
child.kill()
|
||||
})
|
||||
|
||||
child.on('exit', (code) => {
|
||||
if (code !== 0) {
|
||||
resolve('noavx')
|
||||
child.kill()
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Find which variant to run based on the current platform.
|
||||
*/
|
||||
const engineVariant = async (gpuSetting?: GpuSetting): Promise<string> => {
|
||||
const cpuInstruction = await cpuInstructions()
|
||||
log(`[CORTEX]: CPU instruction: ${cpuInstruction}`)
|
||||
let engineVariant = [
|
||||
os(),
|
||||
gpuSetting?.vulkan
|
||||
? 'vulkan'
|
||||
: gpuRunMode(gpuSetting) !== 'cuda'
|
||||
? // CPU mode - support all variants
|
||||
cpuInstruction
|
||||
: // GPU mode - packaged CUDA variants of avx2 and noavx
|
||||
cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
|
||||
? 'avx2'
|
||||
: 'noavx',
|
||||
gpuRunMode(gpuSetting),
|
||||
cudaVersion(gpuSetting),
|
||||
]
|
||||
.filter((e) => !!e)
|
||||
.join('-')
|
||||
|
||||
log(`[CORTEX]: Engine variant: ${engineVariant}`)
|
||||
return engineVariant
|
||||
}
|
||||
|
||||
/**
|
||||
* Create symlink to each variant for the default bundled version
|
||||
@ -148,6 +47,5 @@ const symlinkEngines = async () => {
|
||||
}
|
||||
|
||||
export default {
|
||||
engineVariant,
|
||||
symlinkEngines,
|
||||
}
|
||||
|
||||
86
extensions/engine-management-extension/src/utils.ts
Normal file
86
extensions/engine-management-extension/src/utils.ts
Normal file
@ -0,0 +1,86 @@
|
||||
import { GpuSetting, log } from '@janhq/core'
|
||||
|
||||
/**
|
||||
* The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
|
||||
* @param settings
|
||||
* @returns
|
||||
*/
|
||||
|
||||
const gpuRunMode = (settings?: GpuSetting): string => {
|
||||
return settings.gpus?.some(
|
||||
(gpu) =>
|
||||
gpu.activated === true &&
|
||||
gpu.additional_information &&
|
||||
gpu.additional_information.driver_version
|
||||
)
|
||||
? 'cuda'
|
||||
: ''
|
||||
}
|
||||
|
||||
/**
|
||||
* The OS & architecture that the current process is running on.
|
||||
* @returns win, mac-x64, mac-arm64, or linux
|
||||
*/
|
||||
const os = (settings?: GpuSetting): string => {
|
||||
return PLATFORM === 'win32'
|
||||
? 'windows-amd64'
|
||||
: PLATFORM === 'darwin'
|
||||
? settings?.cpu?.arch === 'arm64'
|
||||
? 'mac-arm64'
|
||||
: 'mac-amd64'
|
||||
: 'linux-amd64'
|
||||
}
|
||||
|
||||
/**
|
||||
* The CUDA version that will be set - either '11-7' or '12-0'.
|
||||
* @param settings
|
||||
* @returns
|
||||
*/
|
||||
const cudaVersion = (settings?: GpuSetting): '12-0' | '11-7' | undefined => {
|
||||
const isUsingCuda =
|
||||
settings?.vulkan !== true &&
|
||||
settings?.gpus?.some((gpu) => (gpu.activated === true ? 'gpu' : 'cpu')) &&
|
||||
!os().includes('mac')
|
||||
|
||||
if (!isUsingCuda) return undefined
|
||||
// return settings?.cuda?.version === '11' ? '11-7' : '12-0'
|
||||
return settings.gpus?.some((gpu) => gpu.version.includes('12'))
|
||||
? '12-0'
|
||||
: '11-7'
|
||||
}
|
||||
|
||||
/**
|
||||
* The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
|
||||
* @returns
|
||||
*/
|
||||
|
||||
/**
|
||||
* Find which variant to run based on the current platform.
|
||||
*/
|
||||
export const engineVariant = async (
|
||||
gpuSetting?: GpuSetting
|
||||
): Promise<string> => {
|
||||
const platform = os(gpuSetting)
|
||||
|
||||
// There is no need to append the variant extension for mac
|
||||
if (platform.startsWith('mac')) return platform
|
||||
|
||||
let engineVariant =
|
||||
gpuSetting?.vulkan || gpuSetting.gpus.some((e) => !e.additional_information)
|
||||
? [platform, 'vulkan']
|
||||
: [
|
||||
platform,
|
||||
gpuRunMode(gpuSetting) === 'cuda' &&
|
||||
(gpuSetting.cpu.instructions.includes('avx2') ||
|
||||
gpuSetting.cpu.instructions.includes('avx512'))
|
||||
? 'avx2'
|
||||
: 'noavx',
|
||||
gpuRunMode(gpuSetting),
|
||||
cudaVersion(gpuSetting),
|
||||
].filter(Boolean) // Remove any falsy values
|
||||
|
||||
let engineVariantString = engineVariant.join('-')
|
||||
|
||||
log(`[CORTEX]: Engine variant: ${engineVariantString}`)
|
||||
return engineVariantString
|
||||
}
|
||||
5
extensions/hardware-management-extension/jest.config.js
Normal file
5
extensions/hardware-management-extension/jest.config.js
Normal file
@ -0,0 +1,5 @@
|
||||
/** @type {import('ts-jest').JestConfigWithTsJest} */
|
||||
module.exports = {
|
||||
preset: 'ts-jest',
|
||||
testEnvironment: 'node',
|
||||
}
|
||||
48
extensions/hardware-management-extension/package.json
Normal file
48
extensions/hardware-management-extension/package.json
Normal file
@ -0,0 +1,48 @@
|
||||
{
|
||||
"name": "@janhq/hardware-management-extension",
|
||||
"productName": "Hardware Management",
|
||||
"version": "1.0.0",
|
||||
"description": "Manages Better Hardware settings.",
|
||||
"main": "dist/index.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
"author": "Jan <service@jan.ai>",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
"test": "jest",
|
||||
"build": "rolldown -c rolldown.config.mjs",
|
||||
"codesign:darwin": "../../.github/scripts/auto-sign.sh",
|
||||
"codesign:win32:linux": "echo 'No codesigning required'",
|
||||
"codesign": "run-script-os",
|
||||
"build:publish": "rimraf *.tgz --glob || true && yarn build && yarn codesign && npm pack && cpx *.tgz ../../pre-install"
|
||||
},
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
"./main": "./dist/module.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"cpx": "^1.5.0",
|
||||
"rimraf": "^3.0.2",
|
||||
"rolldown": "^1.0.0-beta.1",
|
||||
"run-script-os": "^1.1.6",
|
||||
"ts-loader": "^9.5.0",
|
||||
"typescript": "^5.3.3"
|
||||
},
|
||||
"dependencies": {
|
||||
"@janhq/core": "../../core/package.tgz",
|
||||
"cpu-instructions": "^0.0.13",
|
||||
"ky": "^1.7.2",
|
||||
"p-queue": "^8.0.1"
|
||||
},
|
||||
"bundledDependencies": [
|
||||
"cpu-instructions",
|
||||
"@janhq/core"
|
||||
],
|
||||
"hardwares": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"files": [
|
||||
"dist/*",
|
||||
"package.json",
|
||||
"README.md"
|
||||
]
|
||||
}
|
||||
17
extensions/hardware-management-extension/rolldown.config.mjs
Normal file
17
extensions/hardware-management-extension/rolldown.config.mjs
Normal file
@ -0,0 +1,17 @@
|
||||
import { defineConfig } from 'rolldown'
|
||||
import pkgJson from './package.json' with { type: 'json' }
|
||||
|
||||
export default defineConfig([
|
||||
{
|
||||
input: 'src/index.ts',
|
||||
output: {
|
||||
format: 'esm',
|
||||
file: 'dist/index.js',
|
||||
},
|
||||
define: {
|
||||
NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
|
||||
API_URL: JSON.stringify('http://127.0.0.1:39291'),
|
||||
SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
|
||||
},
|
||||
},
|
||||
])
|
||||
12
extensions/hardware-management-extension/src/@types/global.d.ts
vendored
Normal file
12
extensions/hardware-management-extension/src/@types/global.d.ts
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
declare const API_URL: string
|
||||
declare const SOCKET_URL: string
|
||||
declare const NODE: string
|
||||
|
||||
interface Core {
|
||||
api: APIFunctions
|
||||
events: EventEmitter
|
||||
}
|
||||
interface Window {
|
||||
core?: Core | undefined
|
||||
electronAPI?: any | undefined
|
||||
}
|
||||
67
extensions/hardware-management-extension/src/index.ts
Normal file
67
extensions/hardware-management-extension/src/index.ts
Normal file
@ -0,0 +1,67 @@
|
||||
import {
|
||||
executeOnMain,
|
||||
HardwareManagementExtension,
|
||||
HardwareInformation,
|
||||
} from '@janhq/core'
|
||||
import ky from 'ky'
|
||||
import PQueue from 'p-queue'
|
||||
|
||||
/**
|
||||
* JSONHardwareManagementExtension is a HardwareManagementExtension implementation that provides
|
||||
* functionality for managing engines.
|
||||
*/
|
||||
export default class JSONHardwareManagementExtension extends HardwareManagementExtension {
|
||||
queue = new PQueue({ concurrency: 1 })
|
||||
|
||||
/**
|
||||
* Called when the extension is loaded.
|
||||
*/
|
||||
async onLoad() {
|
||||
// Run Healthcheck
|
||||
this.queue.add(() => this.healthz())
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when the extension is unloaded.
|
||||
*/
|
||||
onUnload() {}
|
||||
|
||||
/**
|
||||
* Do health check on cortex.cpp
|
||||
* @returns
|
||||
*/
|
||||
async healthz(): Promise<void> {
|
||||
return ky
|
||||
.get(`${API_URL}/healthz`, {
|
||||
retry: { limit: 20, delay: () => 500, methods: ['get'] },
|
||||
})
|
||||
.then(() => {})
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns A Promise that resolves to an object of hardware.
|
||||
*/
|
||||
async getHardware(): Promise<HardwareInformation> {
|
||||
return this.queue.add(() =>
|
||||
ky
|
||||
.get(`${API_URL}/v1/hardware`)
|
||||
.json<HardwareInformation>()
|
||||
.then((e) => e)
|
||||
) as Promise<HardwareInformation>
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns A Promise that resolves to an object of set gpu activate.
|
||||
*/
|
||||
async setAvtiveGpu(data: { gpus: number[] }): Promise<{
|
||||
message: string
|
||||
activated_gpus: number[]
|
||||
}> {
|
||||
return this.queue.add(() =>
|
||||
ky.post(`${API_URL}/v1/hardware/activate`, { json: data }).then((e) => e)
|
||||
) as Promise<{
|
||||
message: string
|
||||
activated_gpus: number[]
|
||||
}>
|
||||
}
|
||||
}
|
||||
@ -8,7 +8,9 @@
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"strict": false,
|
||||
"skipLibCheck": true,
|
||||
"rootDir": "./src"
|
||||
"rootDir": "./src",
|
||||
"resolveJsonModule": true
|
||||
},
|
||||
"include": ["./src"]
|
||||
"include": ["./src"],
|
||||
"exclude": ["src/**/*.test.ts", "rolldown.config.mjs"]
|
||||
}
|
||||
@ -1 +1 @@
|
||||
1.0.9-rc7
|
||||
1.0.10
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@janhq/inference-cortex-extension",
|
||||
"productName": "Cortex Inference Engine",
|
||||
"version": "1.0.24",
|
||||
"version": "1.0.25",
|
||||
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||
"main": "dist/index.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
|
||||
@ -76,7 +76,7 @@
|
||||
},
|
||||
{
|
||||
"key": "use_mmap",
|
||||
"title": "MMAP",
|
||||
"title": "mmap",
|
||||
"description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": {
|
||||
|
||||
@ -0,0 +1,35 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "deepseek-r1-distill-llama-70b",
|
||||
"object": "model",
|
||||
"name": "DeepSeek R1 Distill Llama 70B Q4",
|
||||
"version": "1.0",
|
||||
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 131072,
|
||||
"prompt_template": "<|User|> {prompt} <|Assistant|>",
|
||||
"llama_model_path": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
|
||||
"ngl": 81
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 131072,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "DeepSeek",
|
||||
"tags": ["70B", "Featured"],
|
||||
"size": 42500000000
|
||||
},
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -0,0 +1,35 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
|
||||
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "deepseek-r1-distill-llama-8b",
|
||||
"object": "model",
|
||||
"name": "DeepSeek R1 Distill Llama 8B Q5",
|
||||
"version": "1.0",
|
||||
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 131072,
|
||||
"prompt_template": "<|User|> {prompt} <|Assistant|>",
|
||||
"llama_model_path": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
|
||||
"ngl": 33
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 131072,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "DeepSeek",
|
||||
"tags": ["8B", "Featured"],
|
||||
"size": 5730000000
|
||||
},
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -0,0 +1,35 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
|
||||
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "deepseek-r1-distill-qwen-1.5b",
|
||||
"object": "model",
|
||||
"name": "DeepSeek R1 Distill Qwen 1.5B Q5",
|
||||
"version": "1.0",
|
||||
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 131072,
|
||||
"prompt_template": "<|User|> {prompt} <|Assistant|>",
|
||||
"llama_model_path": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
|
||||
"ngl": 29
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 131072,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "DeepSeek",
|
||||
"tags": ["1.5B", "Featured"],
|
||||
"size": 1290000000
|
||||
},
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -0,0 +1,35 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "deepseek-r1-distill-qwen-14b",
|
||||
"object": "model",
|
||||
"name": "DeepSeek R1 Distill Qwen 14B Q4",
|
||||
"version": "1.0",
|
||||
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 131072,
|
||||
"prompt_template": "<|User|> {prompt} <|Assistant|>",
|
||||
"llama_model_path": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
|
||||
"ngl": 49
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 131072,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "DeepSeek",
|
||||
"tags": ["14B", "Featured"],
|
||||
"size": 8990000000
|
||||
},
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -0,0 +1,35 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "deepseek-r1-distill-qwen-32b",
|
||||
"object": "model",
|
||||
"name": "DeepSeek R1 Distill Qwen 32B Q4",
|
||||
"version": "1.0",
|
||||
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 131072,
|
||||
"prompt_template": "<|User|> {prompt} <|Assistant|>",
|
||||
"llama_model_path": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
|
||||
"ngl": 65
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 131072,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "DeepSeek",
|
||||
"tags": ["32B", "Featured"],
|
||||
"size": 19900000000
|
||||
},
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -0,0 +1,35 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
|
||||
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "deepseek-r1-distill-qwen-7b",
|
||||
"object": "model",
|
||||
"name": "DeepSeek R1 Distill Qwen 7B Q5",
|
||||
"version": "1.0",
|
||||
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 131072,
|
||||
"prompt_template": "<|User|> {prompt} <|Assistant|>",
|
||||
"llama_model_path": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
|
||||
"ngl": 29
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 131072,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "DeepSeek",
|
||||
"tags": ["7B", "Featured"],
|
||||
"size": 5440000000
|
||||
},
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -22,19 +22,13 @@
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 8192,
|
||||
"stop": [
|
||||
"<|end_of_text|>",
|
||||
"<|eot_id|>",
|
||||
"<|eom_id|>"
|
||||
],
|
||||
"stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "MetaAI",
|
||||
"tags": [
|
||||
"8B", "Featured"
|
||||
],
|
||||
"tags": ["8B", "Featured"],
|
||||
"size": 4920000000
|
||||
},
|
||||
"engine": "llama-cpp"
|
||||
|
||||
@ -49,6 +49,13 @@ import qwen2514bJson from './resources/models/qwen2.5-14b-instruct/model.json' w
|
||||
import qwen2532bJson from './resources/models/qwen2.5-32b-instruct/model.json' with { type: 'json' }
|
||||
import qwen2572bJson from './resources/models/qwen2.5-72b-instruct/model.json' with { type: 'json' }
|
||||
|
||||
import deepseekR1DistillQwen_1_5b from './resources/models/deepseek-r1-distill-qwen-1.5b/model.json' with { type: 'json' }
|
||||
import deepseekR1DistillQwen_7b from './resources/models/deepseek-r1-distill-qwen-7b/model.json' with { type: 'json' }
|
||||
import deepseekR1DistillQwen_14b from './resources/models/deepseek-r1-distill-qwen-14b/model.json' with { type: 'json' }
|
||||
import deepseekR1DistillQwen_32b from './resources/models/deepseek-r1-distill-qwen-32b/model.json' with { type: 'json' }
|
||||
import deepseekR1DistillLlama_8b from './resources/models/deepseek-r1-distill-llama-8b/model.json' with { type: 'json' }
|
||||
import deepseekR1DistillLlama_70b from './resources/models/deepseek-r1-distill-llama-70b/model.json' with { type: 'json' }
|
||||
|
||||
export default defineConfig([
|
||||
{
|
||||
input: 'src/index.ts',
|
||||
@ -106,6 +113,12 @@ export default defineConfig([
|
||||
qwen2514bJson,
|
||||
qwen2532bJson,
|
||||
qwen2572bJson,
|
||||
deepseekR1DistillQwen_1_5b,
|
||||
deepseekR1DistillQwen_7b,
|
||||
deepseekR1DistillQwen_14b,
|
||||
deepseekR1DistillQwen_32b,
|
||||
deepseekR1DistillLlama_8b,
|
||||
deepseekR1DistillLlama_70b,
|
||||
]),
|
||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||
SETTINGS: JSON.stringify(defaultSettingJson),
|
||||
|
||||
@ -112,8 +112,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
|
||||
|
||||
// Run the process watchdog
|
||||
const systemInfo = await systemInformation()
|
||||
this.queue.add(() => executeOnMain(NODE, 'run', systemInfo))
|
||||
// const systemInfo = await systemInformation()
|
||||
this.queue.add(() => executeOnMain(NODE, 'run'))
|
||||
this.queue.add(() => this.healthz())
|
||||
this.subscribeToEvents()
|
||||
|
||||
|
||||
@ -16,15 +16,20 @@ let watchdog: ProcessWatchdog | undefined = undefined
|
||||
* Spawns a Nitro subprocess.
|
||||
* @returns A promise that resolves when the Nitro subprocess is started.
|
||||
*/
|
||||
function run(systemInfo?: SystemInformation): Promise<any> {
|
||||
function run(): Promise<any> {
|
||||
log(`[CORTEX]:: Spawning cortex subprocess...`)
|
||||
|
||||
return new Promise<void>(async (resolve, reject) => {
|
||||
let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||
let binaryName = `cortex-server${process.platform === 'win32' ? '.exe' : ''}`
|
||||
// let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||
let binaryName = `cortex-server${
|
||||
process.platform === 'win32' ? '.exe' : ''
|
||||
}`
|
||||
const binPath = path.join(__dirname, '..', 'bin')
|
||||
|
||||
const executablePath = path.join(binPath, binaryName)
|
||||
|
||||
addEnvPaths(binPath)
|
||||
|
||||
const sharedPath = path.join(appResourcePath(), 'shared')
|
||||
// Execute the binary
|
||||
log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`)
|
||||
@ -44,15 +49,17 @@ function run(systemInfo?: SystemInformation): Promise<any> {
|
||||
`${path.join(dataFolderPath, '.janrc')}`,
|
||||
'--data_folder_path',
|
||||
dataFolderPath,
|
||||
'--loglevel',
|
||||
'INFO',
|
||||
],
|
||||
{
|
||||
env: {
|
||||
...process.env,
|
||||
CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
|
||||
// Vulkan - Support 1 device at a time for now
|
||||
...(gpuVisibleDevices?.length > 0 && {
|
||||
GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
|
||||
}),
|
||||
// CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
|
||||
// // Vulkan - Support 1 device at a time for now
|
||||
// ...(gpuVisibleDevices?.length > 0 && {
|
||||
// GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
|
||||
// }),
|
||||
},
|
||||
cwd: sharedPath,
|
||||
}
|
||||
@ -71,6 +78,22 @@ function dispose() {
|
||||
watchdog?.terminate()
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the environment paths for the cortex subprocess
|
||||
* @param dest
|
||||
*/
|
||||
function addEnvPaths(dest: string) {
|
||||
// Add engine path to the PATH and LD_LIBRARY_PATH
|
||||
if (process.platform === 'win32') {
|
||||
process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
|
||||
} else {
|
||||
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
|
||||
path.delimiter,
|
||||
dest
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cortex process info
|
||||
*/
|
||||
|
||||
@ -15,8 +15,6 @@ import {
|
||||
} from '@janhq/core'
|
||||
import { CortexAPI } from './cortex'
|
||||
import { scanModelsFolder } from './legacy/model-json'
|
||||
import { downloadModel } from './legacy/download'
|
||||
import { systemInformation } from '@janhq/core'
|
||||
import { deleteModelFiles } from './legacy/delete'
|
||||
|
||||
export enum Settings {
|
||||
@ -71,18 +69,6 @@ export default class JanModelExtension extends ModelExtension {
|
||||
* @returns A Promise that resolves when the model is downloaded.
|
||||
*/
|
||||
async pullModel(model: string, id?: string, name?: string): Promise<void> {
|
||||
if (id) {
|
||||
const model: Model = ModelManager.instance().get(id)
|
||||
// Clip vision model - should not be handled by cortex.cpp
|
||||
// TensorRT model - should not be handled by cortex.cpp
|
||||
if (
|
||||
model &&
|
||||
(model.engine === InferenceEngine.nitro_tensorrt_llm ||
|
||||
model.settings.vision_model)
|
||||
) {
|
||||
return downloadModel(model, (await systemInformation()).gpuSetting)
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Sending POST to /models/pull/{id} endpoint to pull the model
|
||||
*/
|
||||
|
||||
@ -2,15 +2,12 @@ import {
|
||||
downloadFile,
|
||||
DownloadRequest,
|
||||
fs,
|
||||
GpuSetting,
|
||||
InferenceEngine,
|
||||
joinPath,
|
||||
Model,
|
||||
} from '@janhq/core'
|
||||
|
||||
export const downloadModel = async (
|
||||
model: Model,
|
||||
gpuSettings?: GpuSetting,
|
||||
network?: { ignoreSSL?: boolean; proxy?: string }
|
||||
): Promise<void> => {
|
||||
const homedir = 'file://models'
|
||||
@ -27,41 +24,6 @@ export const downloadModel = async (
|
||||
JSON.stringify(model, null, 2)
|
||||
)
|
||||
|
||||
if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
|
||||
if (!gpuSettings || gpuSettings.gpus.length === 0) {
|
||||
console.error('No GPU found. Please check your GPU setting.')
|
||||
return
|
||||
}
|
||||
const firstGpu = gpuSettings.gpus[0]
|
||||
if (!firstGpu.name.toLowerCase().includes('nvidia')) {
|
||||
console.error('No Nvidia GPU found. Please check your GPU setting.')
|
||||
return
|
||||
}
|
||||
const gpuArch = firstGpu.arch
|
||||
if (gpuArch === undefined) {
|
||||
console.error('No GPU architecture found. Please check your GPU setting.')
|
||||
return
|
||||
}
|
||||
|
||||
if (!supportedGpuArch.includes(gpuArch)) {
|
||||
console.debug(
|
||||
`Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.`
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
const os = 'windows' // TODO: remove this hard coded value
|
||||
|
||||
const newSources = model.sources.map((source) => {
|
||||
const newSource = { ...source }
|
||||
newSource.url = newSource.url
|
||||
.replace(/<os>/g, os)
|
||||
.replace(/<gpuarch>/g, gpuArch)
|
||||
return newSource
|
||||
})
|
||||
model.sources = newSources
|
||||
}
|
||||
|
||||
console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
|
||||
|
||||
if (model.sources.length > 1) {
|
||||
|
||||
@ -1,75 +0,0 @@
|
||||
# Create a Jan Extension using Typescript
|
||||
|
||||
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
|
||||
|
||||
## Create Your Own Extension
|
||||
|
||||
To create your own extension, you can use this repository as a template! Just follow the below instructions:
|
||||
|
||||
1. Click the Use this template button at the top of the repository
|
||||
2. Select Create a new repository
|
||||
3. Select an owner and name for your new repository
|
||||
4. Click Create repository
|
||||
5. Clone your new repository
|
||||
|
||||
## Initial Setup
|
||||
|
||||
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
|
||||
|
||||
> [!NOTE]
|
||||
>
|
||||
> You'll need to have a reasonably modern version of
|
||||
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
|
||||
> [`nodenv`](https://github.com/nodenv/nodenv) or
|
||||
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
|
||||
> root of your repository to install the version specified in
|
||||
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
|
||||
|
||||
1. :hammer_and_wrench: Install the dependencies
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
1. :building_construction: Package the TypeScript for distribution
|
||||
|
||||
```bash
|
||||
npm run bundle
|
||||
```
|
||||
|
||||
1. :white_check_mark: Check your artifact
|
||||
|
||||
There will be a tgz file in your extension directory now
|
||||
|
||||
## Update the Extension Metadata
|
||||
|
||||
The [`package.json`](package.json) file defines metadata about your extension, such as
|
||||
extension name, main entry, description and version.
|
||||
|
||||
When you copy this repository, update `package.json` with the name, description for your extension.
|
||||
|
||||
## Update the Extension Code
|
||||
|
||||
The [`src/`](./src/) directory is the heart of your extension! This contains the
|
||||
source code that will be run when your extension functions are invoked. You can replace the
|
||||
contents of this directory with your own code.
|
||||
|
||||
There are a few things to keep in mind when writing your extension code:
|
||||
|
||||
- Most Jan Extension functions are processed asynchronously.
|
||||
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
|
||||
|
||||
```typescript
|
||||
import { events, MessageEvent, MessageRequest } from '@janhq/core'
|
||||
|
||||
function onStart(): Promise<any> {
|
||||
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
|
||||
this.inference(data)
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
For more information about the Jan Extension Core module, see the
|
||||
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
|
||||
|
||||
So, what are you waiting for? Go ahead and start customizing your extension!
|
||||
@ -1,2 +0,0 @@
|
||||
@echo off
|
||||
.\node_modules\.bin\download https://catalog.jan.ai/vulkaninfoSDK.exe -o ./bin
|
||||
@ -1,49 +0,0 @@
|
||||
{
|
||||
"name": "@janhq/monitoring-extension",
|
||||
"productName": "System Monitoring",
|
||||
"version": "1.0.10",
|
||||
"description": "Provides system health and OS level data.",
|
||||
"main": "dist/index.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
"author": "Jan <service@jan.ai>",
|
||||
"license": "AGPL-3.0",
|
||||
"scripts": {
|
||||
"build": "rolldown -c rolldown.config.mjs && yarn download-artifacts",
|
||||
"download-artifacts": "run-script-os && cpx \"bin/**\" \"dist/bin\"",
|
||||
"download-artifacts:darwin": "echo 'No artifacts to download for darwin'",
|
||||
"download-artifacts:win32": "download.bat",
|
||||
"download-artifacts:linux": "download https://catalog.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
|
||||
"build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
|
||||
},
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
"./main": "./dist/node/index.cjs.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.11.4",
|
||||
"@types/node-os-utils": "^1.3.4",
|
||||
"cpx": "^1.5.0",
|
||||
"download-cli": "^1.1.1",
|
||||
"rimraf": "^3.0.2",
|
||||
"rolldown": "1.0.0-beta.1",
|
||||
"run-script-os": "^1.1.6",
|
||||
"typescript": "^5.3.3"
|
||||
},
|
||||
"dependencies": {
|
||||
"@janhq/core": "../../core/package.tgz",
|
||||
"node-os-utils": "^1.3.7"
|
||||
},
|
||||
"files": [
|
||||
"dist/*",
|
||||
"package.json",
|
||||
"README.md"
|
||||
],
|
||||
"bundleDependencies": [
|
||||
"node-os-utils",
|
||||
"@janhq/core"
|
||||
],
|
||||
"installConfig": {
|
||||
"hoistingLimits": "workspaces"
|
||||
},
|
||||
"packageManager": "yarn@4.5.3"
|
||||
}
|
||||
@ -1,22 +0,0 @@
|
||||
[
|
||||
{
|
||||
"key": "log-enabled",
|
||||
"title": "Enable App Logs",
|
||||
"description": "Saves app logs locally on your computer. This enables you to send us crash reports.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": {
|
||||
"value": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "log-cleaning-interval",
|
||||
"title": "Log Cleaning Interval",
|
||||
"description": "Automatically delete local logs after a certain time interval (in milliseconds).",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"value": "120000",
|
||||
"placeholder": "Interval in milliseconds. E.g. 120000",
|
||||
"textAlign": "right"
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -1,32 +0,0 @@
|
||||
import { defineConfig } from 'rolldown'
|
||||
import packageJson from './package.json' with { type: 'json' }
|
||||
import settingJson from './resources/settings.json' with { type: 'json' }
|
||||
|
||||
export default defineConfig([
|
||||
{
|
||||
input: 'src/index.ts',
|
||||
output: {
|
||||
format: 'esm',
|
||||
file: 'dist/index.js',
|
||||
},
|
||||
platform: 'browser',
|
||||
define: {
|
||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||
SETTINGS: JSON.stringify(settingJson),
|
||||
},
|
||||
},
|
||||
{
|
||||
input: 'src/node/index.ts',
|
||||
external: ['@janhq/core/node'],
|
||||
output: {
|
||||
format: 'cjs',
|
||||
file: 'dist/node/index.cjs.js',
|
||||
sourcemap: false,
|
||||
inlineDynamicImports: true,
|
||||
},
|
||||
resolve: {
|
||||
extensions: ['.js', '.ts', '.json'],
|
||||
},
|
||||
platform: 'node',
|
||||
},
|
||||
])
|
||||
@ -1,19 +0,0 @@
|
||||
declare const NODE: string
|
||||
declare const SETTINGS: SettingComponentProps[]
|
||||
|
||||
type CpuGpuInfo = {
|
||||
cpu: {
|
||||
usage: number
|
||||
}
|
||||
gpu: GpuInfo[]
|
||||
}
|
||||
|
||||
type GpuInfo = {
|
||||
id: string
|
||||
name: string
|
||||
temperature: string
|
||||
utilization: string
|
||||
memoryTotal: string
|
||||
memoryFree: string
|
||||
memoryUtilization: string
|
||||
}
|
||||
@ -1,90 +0,0 @@
|
||||
import {
|
||||
AppConfigurationEventName,
|
||||
GpuSetting,
|
||||
MonitoringExtension,
|
||||
OperatingSystemInfo,
|
||||
events,
|
||||
executeOnMain,
|
||||
} from '@janhq/core'
|
||||
|
||||
enum Settings {
|
||||
logEnabled = 'log-enabled',
|
||||
logCleaningInterval = 'log-cleaning-interval',
|
||||
}
|
||||
/**
|
||||
* JanMonitoringExtension is a extension that provides system monitoring functionality.
|
||||
* It implements the MonitoringExtension interface from the @janhq/core package.
|
||||
*/
|
||||
export default class JanMonitoringExtension extends MonitoringExtension {
|
||||
/**
|
||||
* Called when the extension is loaded.
|
||||
*/
|
||||
async onLoad() {
|
||||
// Register extension settings
|
||||
this.registerSettings(SETTINGS)
|
||||
|
||||
const logEnabled = await this.getSetting<boolean>(Settings.logEnabled, true)
|
||||
const logCleaningInterval = parseInt(
|
||||
await this.getSetting<string>(Settings.logCleaningInterval, '120000')
|
||||
)
|
||||
// Register File Logger provided by this extension
|
||||
await executeOnMain(NODE, 'registerLogger', {
|
||||
logEnabled,
|
||||
logCleaningInterval: isNaN(logCleaningInterval)
|
||||
? 120000
|
||||
: logCleaningInterval,
|
||||
})
|
||||
|
||||
// Attempt to fetch nvidia info
|
||||
await executeOnMain(NODE, 'updateNvidiaInfo')
|
||||
events.emit(AppConfigurationEventName.OnConfigurationUpdate, {})
|
||||
}
|
||||
|
||||
onSettingUpdate<T>(key: string, value: T): void {
|
||||
if (key === Settings.logEnabled) {
|
||||
executeOnMain(NODE, 'updateLogger', { logEnabled: value })
|
||||
} else if (key === Settings.logCleaningInterval) {
|
||||
executeOnMain(NODE, 'updateLogger', { logCleaningInterval: value })
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when the extension is unloaded.
|
||||
*/
|
||||
onUnload(): void {
|
||||
// Register File Logger provided by this extension
|
||||
executeOnMain(NODE, 'unregisterLogger')
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the GPU configuration.
|
||||
* @returns A Promise that resolves to an object containing the GPU configuration.
|
||||
*/
|
||||
async getGpuSetting(): Promise<GpuSetting | undefined> {
|
||||
return executeOnMain(NODE, 'getGpuConfig')
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns information about the system resources.
|
||||
* @returns A Promise that resolves to an object containing information about the system resources.
|
||||
*/
|
||||
getResourcesInfo(): Promise<any> {
|
||||
return executeOnMain(NODE, 'getResourcesInfo')
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns information about the current system load.
|
||||
* @returns A Promise that resolves to an object containing information about the current system load.
|
||||
*/
|
||||
getCurrentLoad(): Promise<any> {
|
||||
return executeOnMain(NODE, 'getCurrentLoad')
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns information about the OS
|
||||
* @returns
|
||||
*/
|
||||
getOsInfo(): Promise<OperatingSystemInfo> {
|
||||
return executeOnMain(NODE, 'getOsInfo')
|
||||
}
|
||||
}
|
||||
@ -1,389 +0,0 @@
|
||||
import {
|
||||
GpuSetting,
|
||||
GpuSettingInfo,
|
||||
LoggerManager,
|
||||
OperatingSystemInfo,
|
||||
ResourceInfo,
|
||||
SupportedPlatforms,
|
||||
getJanDataFolderPath,
|
||||
log,
|
||||
} from '@janhq/core/node'
|
||||
import { mem, cpu } from 'node-os-utils'
|
||||
import { exec } from 'child_process'
|
||||
import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs'
|
||||
import path from 'path'
|
||||
import os from 'os'
|
||||
import { FileLogger } from './logger'
|
||||
|
||||
/**
|
||||
* Path to the settings directory
|
||||
**/
|
||||
export const SETTINGS_DIR = path.join(getJanDataFolderPath(), 'settings')
|
||||
/**
|
||||
* Path to the settings file
|
||||
**/
|
||||
export const GPU_INFO_FILE = path.join(SETTINGS_DIR, 'settings.json')
|
||||
|
||||
/**
|
||||
* Default GPU settings
|
||||
* TODO: This needs to be refactored to support multiple accelerators
|
||||
**/
|
||||
const DEFAULT_SETTINGS: GpuSetting = {
|
||||
notify: true,
|
||||
run_mode: 'cpu',
|
||||
nvidia_driver: {
|
||||
exist: false,
|
||||
version: '',
|
||||
},
|
||||
cuda: {
|
||||
exist: false,
|
||||
version: '',
|
||||
},
|
||||
gpus: [],
|
||||
gpu_highest_vram: '',
|
||||
gpus_in_use: [],
|
||||
is_initial: true,
|
||||
// TODO: This needs to be set based on user toggle in settings
|
||||
vulkan: false,
|
||||
}
|
||||
|
||||
export const getGpuConfig = async (): Promise<GpuSetting | undefined> => {
|
||||
if (process.platform === 'darwin') return undefined
|
||||
if (existsSync(GPU_INFO_FILE))
|
||||
return JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
|
||||
return DEFAULT_SETTINGS
|
||||
}
|
||||
|
||||
export const getResourcesInfo = async (): Promise<ResourceInfo> => {
|
||||
const ramUsedInfo = await mem.used()
|
||||
const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
|
||||
const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
|
||||
|
||||
const resourceInfo: ResourceInfo = {
|
||||
mem: {
|
||||
totalMemory,
|
||||
usedMemory,
|
||||
},
|
||||
}
|
||||
|
||||
return resourceInfo
|
||||
}
|
||||
|
||||
export const getCurrentLoad = () =>
|
||||
new Promise<CpuGpuInfo>(async (resolve, reject) => {
|
||||
const cpuPercentage = await cpu.usage()
|
||||
let data = {
|
||||
run_mode: 'cpu',
|
||||
gpus_in_use: [],
|
||||
}
|
||||
|
||||
if (process.platform !== 'darwin') {
|
||||
data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
|
||||
}
|
||||
|
||||
if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
|
||||
const gpuIds = data.gpus_in_use.join(',')
|
||||
if (gpuIds !== '' && data['vulkan'] !== true) {
|
||||
exec(
|
||||
`nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
|
||||
(error, stdout, _) => {
|
||||
if (error) {
|
||||
console.error(`exec error: ${error}`)
|
||||
throw new Error(error.message)
|
||||
}
|
||||
const gpuInfo: GpuInfo[] = stdout
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map((line) => {
|
||||
const [
|
||||
id,
|
||||
name,
|
||||
temperature,
|
||||
utilization,
|
||||
memoryTotal,
|
||||
memoryFree,
|
||||
memoryUtilization,
|
||||
] = line.split(', ').map((item) => item.replace(/\r/g, ''))
|
||||
return {
|
||||
id,
|
||||
name,
|
||||
temperature,
|
||||
utilization,
|
||||
memoryTotal,
|
||||
memoryFree,
|
||||
memoryUtilization,
|
||||
}
|
||||
})
|
||||
|
||||
resolve({
|
||||
cpu: { usage: cpuPercentage },
|
||||
gpu: gpuInfo,
|
||||
})
|
||||
}
|
||||
)
|
||||
} else {
|
||||
// Handle the case where gpuIds is empty
|
||||
resolve({
|
||||
cpu: { usage: cpuPercentage },
|
||||
gpu: [],
|
||||
})
|
||||
}
|
||||
} else {
|
||||
// Handle the case where run_mode is not 'gpu' or no GPUs are in use
|
||||
resolve({
|
||||
cpu: { usage: cpuPercentage },
|
||||
gpu: [],
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* This will retrieve GPU information and persist settings.json
|
||||
* Will be called when the extension is loaded to turn on GPU acceleration if supported
|
||||
*/
|
||||
export const updateNvidiaInfo = async () => {
|
||||
// ignore if macos
|
||||
if (process.platform === 'darwin') return
|
||||
|
||||
try {
|
||||
JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
|
||||
} catch (error) {
|
||||
if (!existsSync(SETTINGS_DIR)) {
|
||||
mkdirSync(SETTINGS_DIR, {
|
||||
recursive: true,
|
||||
})
|
||||
}
|
||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2))
|
||||
}
|
||||
|
||||
await updateNvidiaDriverInfo()
|
||||
await updateGpuInfo()
|
||||
}
|
||||
|
||||
const updateNvidiaDriverInfo = async () =>
|
||||
new Promise((resolve, reject) => {
|
||||
exec(
|
||||
'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
|
||||
(error, stdout) => {
|
||||
const data: GpuSetting = JSON.parse(
|
||||
readFileSync(GPU_INFO_FILE, 'utf-8')
|
||||
)
|
||||
|
||||
if (!error) {
|
||||
const firstLine = stdout.split('\n')[0].trim()
|
||||
data.nvidia_driver.exist = true
|
||||
data.nvidia_driver.version = firstLine
|
||||
} else {
|
||||
data.nvidia_driver.exist = false
|
||||
}
|
||||
|
||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
|
||||
resolve({})
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
const getGpuArch = (gpuName: string): string => {
|
||||
if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'
|
||||
|
||||
if (gpuName.includes('30')) return 'ampere'
|
||||
else if (gpuName.includes('40')) return 'ada'
|
||||
else return 'unknown'
|
||||
}
|
||||
|
||||
const updateGpuInfo = async () =>
|
||||
new Promise((resolve, reject) => {
|
||||
let data: GpuSetting = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
|
||||
|
||||
// Cuda
|
||||
if (data.vulkan === true) {
|
||||
// Vulkan
|
||||
exec(
|
||||
process.platform === 'win32'
|
||||
? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
|
||||
: `${__dirname}/../bin/vulkaninfo --summary`,
|
||||
async (error, stdout) => {
|
||||
if (!error) {
|
||||
const output = stdout.toString()
|
||||
|
||||
log(output)
|
||||
const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
|
||||
|
||||
const gpus: GpuSettingInfo[] = []
|
||||
let match
|
||||
while ((match = gpuRegex.exec(output)) !== null) {
|
||||
const id = match[1]
|
||||
const name = match[2]
|
||||
const arch = getGpuArch(name)
|
||||
gpus.push({ id, vram: '0', name, arch })
|
||||
}
|
||||
data.gpus = gpus
|
||||
|
||||
if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
|
||||
data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
|
||||
}
|
||||
|
||||
data = await updateCudaExistence(data)
|
||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
|
||||
log(`[APP]::${JSON.stringify(data)}`)
|
||||
resolve({})
|
||||
} else {
|
||||
reject(error)
|
||||
}
|
||||
}
|
||||
)
|
||||
} else {
|
||||
exec(
|
||||
'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
|
||||
async (error, stdout) => {
|
||||
if (!error) {
|
||||
log(`[SPECS]::${stdout}`)
|
||||
// Get GPU info and gpu has higher memory first
|
||||
let highestVram = 0
|
||||
let highestVramId = '0'
|
||||
const gpus: GpuSettingInfo[] = stdout
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map((line) => {
|
||||
let [id, vram, name] = line.split(', ')
|
||||
const arch = getGpuArch(name)
|
||||
vram = vram.replace(/\r/g, '')
|
||||
if (parseFloat(vram) > highestVram) {
|
||||
highestVram = parseFloat(vram)
|
||||
highestVramId = id
|
||||
}
|
||||
return { id, vram, name, arch }
|
||||
})
|
||||
|
||||
data.gpus = gpus
|
||||
data.gpu_highest_vram = highestVramId
|
||||
} else {
|
||||
data.gpus = []
|
||||
data.gpu_highest_vram = undefined
|
||||
}
|
||||
|
||||
if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
|
||||
data.gpus_in_use = data.gpu_highest_vram ? [data.gpu_highest_vram].filter(e => !!e) : []
|
||||
}
|
||||
|
||||
data = await updateCudaExistence(data)
|
||||
console.log('[MONITORING]::Cuda info: ', data)
|
||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
|
||||
log(`[APP]::${JSON.stringify(data)}`)
|
||||
resolve({})
|
||||
}
|
||||
)
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* Check if file exists in paths
|
||||
*/
|
||||
const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => {
|
||||
return paths.some((p) => existsSync(path.join(p, file)))
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate cuda for linux and windows
|
||||
*/
|
||||
const updateCudaExistence = async (
|
||||
data: GpuSetting = DEFAULT_SETTINGS
|
||||
): Promise<GpuSetting> => {
|
||||
let filesCuda12: string[]
|
||||
let filesCuda11: string[]
|
||||
let paths: string[]
|
||||
let cudaVersion: string = ''
|
||||
|
||||
if (process.platform === 'win32') {
|
||||
filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
|
||||
filesCuda11 = ['cublas64_11.dll', 'cudart64_110.dll', 'cublasLt64_11.dll']
|
||||
paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
|
||||
} else {
|
||||
filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
|
||||
filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
|
||||
paths = process.env.LD_LIBRARY_PATH
|
||||
? process.env.LD_LIBRARY_PATH.split(path.delimiter)
|
||||
: []
|
||||
paths.push('/usr/lib/x86_64-linux-gnu/')
|
||||
}
|
||||
|
||||
let cudaExists = filesCuda12.every(
|
||||
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
|
||||
)
|
||||
|
||||
if (!cudaExists) {
|
||||
cudaExists = filesCuda11.every(
|
||||
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
|
||||
)
|
||||
if (cudaExists) {
|
||||
cudaVersion = '11'
|
||||
}
|
||||
} else {
|
||||
cudaVersion = '12'
|
||||
}
|
||||
|
||||
data.cuda.exist = cudaExists
|
||||
data.cuda.version = cudaVersion
|
||||
|
||||
console.debug(data.is_initial, data.gpus_in_use)
|
||||
|
||||
if (cudaExists && data.is_initial && data.gpus_in_use.length > 0) {
|
||||
data.run_mode = 'gpu'
|
||||
}
|
||||
|
||||
data.is_initial = false
|
||||
|
||||
// Attempt to query CUDA using NVIDIA SMI
|
||||
if (!cudaExists) {
|
||||
await new Promise<void>((resolve) => {
|
||||
exec('nvidia-smi', (error, stdout) => {
|
||||
if (!error) {
|
||||
const regex = /CUDA\s*Version:\s*(\d+\.\d+)/g
|
||||
const match = regex.exec(stdout)
|
||||
if (match && match[1]) {
|
||||
data.cuda.version = match[1]
|
||||
}
|
||||
}
|
||||
console.log('[MONITORING]::Finalized cuda info update: ', data)
|
||||
resolve()
|
||||
})
|
||||
})
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
export const getOsInfo = (): OperatingSystemInfo => {
|
||||
const platform =
|
||||
SupportedPlatforms.find((p) => p === process.platform) || 'unknown'
|
||||
|
||||
const osInfo: OperatingSystemInfo = {
|
||||
platform: platform,
|
||||
arch: process.arch,
|
||||
release: os.release(),
|
||||
machine: os.machine(),
|
||||
version: os.version(),
|
||||
totalMem: os.totalmem(),
|
||||
freeMem: os.freemem(),
|
||||
}
|
||||
|
||||
return osInfo
|
||||
}
|
||||
|
||||
export const registerLogger = ({ logEnabled, logCleaningInterval }) => {
|
||||
const logger = new FileLogger(logEnabled, logCleaningInterval)
|
||||
LoggerManager.instance().register(logger)
|
||||
logger.cleanLogs()
|
||||
}
|
||||
|
||||
export const unregisterLogger = () => {
|
||||
LoggerManager.instance().unregister('file')
|
||||
}
|
||||
|
||||
export const updateLogger = ({ logEnabled, logCleaningInterval }) => {
|
||||
const logger = LoggerManager.instance().loggers.get('file') as FileLogger
|
||||
if (logger && logEnabled !== undefined) logger.logEnabled = logEnabled
|
||||
if (logger && logCleaningInterval)
|
||||
logger.logCleaningInterval = logCleaningInterval
|
||||
// Rerun
|
||||
logger && logger.cleanLogs()
|
||||
}
|
||||
@ -5,77 +5,470 @@
|
||||
"post": {
|
||||
"operationId": "AssistantsController_create",
|
||||
"summary": "Create assistant",
|
||||
"description": "Creates a new assistant.",
|
||||
"parameters": [],
|
||||
"description": "Creates a new assistant with the specified configuration.",
|
||||
"requestBody": {
|
||||
"required": true,
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CreateAssistantDto"
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The model identifier to use for the assistant."
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The name of the assistant."
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "The description of the assistant."
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string",
|
||||
"description": "Instructions for the assistant's behavior."
|
||||
},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"description": "A list of tools enabled on the assistant. Maximum of 128 tools.",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"code_interpreter",
|
||||
"file_search",
|
||||
"function"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"responses": {
|
||||
"201": {
|
||||
"description": "The assistant has been successfully created."
|
||||
"tool_resources": {
|
||||
"type": "object",
|
||||
"description": "Resources used by the assistant's tools.",
|
||||
"properties": {
|
||||
"code_interpreter": {
|
||||
"type": "object"
|
||||
},
|
||||
"file_search": {
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": ["Assistants"]
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"description": "Set of key-value pairs for the assistant.",
|
||||
"additionalProperties": true
|
||||
},
|
||||
"get": {
|
||||
"operationId": "AssistantsController_findAll",
|
||||
"summary": "List assistants",
|
||||
"description": "Returns a list of assistants.",
|
||||
"parameters": [
|
||||
"temperature": {
|
||||
"type": "number",
|
||||
"format": "float",
|
||||
"description": "Temperature parameter for response generation."
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number",
|
||||
"format": "float",
|
||||
"description": "Top p parameter for response generation."
|
||||
},
|
||||
"response_format": {
|
||||
"oneOf": [
|
||||
{
|
||||
"name": "limit",
|
||||
"required": false,
|
||||
"in": "query",
|
||||
"description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
|
||||
"schema": {
|
||||
"type": "number"
|
||||
}
|
||||
"type": "string",
|
||||
"enum": ["auto"]
|
||||
},
|
||||
{
|
||||
"name": "order",
|
||||
"required": false,
|
||||
"in": "query",
|
||||
"description": "Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "after",
|
||||
"required": false,
|
||||
"in": "query",
|
||||
"description": "A cursor for use in pagination. after is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"required": ["model"]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "before",
|
||||
"required": false,
|
||||
"in": "query",
|
||||
"description": "A cursor for use in pagination. before is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Ok",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The unique identifier of the assistant."
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"enum": ["assistant"],
|
||||
"description": "The object type, which is always 'assistant'."
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer",
|
||||
"description": "Unix timestamp (in seconds) of when the assistant was created."
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The model identifier used by the assistant."
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The name of the assistant."
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "The description of the assistant."
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string",
|
||||
"description": "Instructions for the assistant's behavior."
|
||||
},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"description": "A list of tools enabled on the assistant.",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"code_interpreter",
|
||||
"file_search",
|
||||
"function"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tool_resources": {
|
||||
"type": "object",
|
||||
"description": "Resources used by the assistant's tools.",
|
||||
"properties": {
|
||||
"code_interpreter": {
|
||||
"type": "object"
|
||||
},
|
||||
"file_search": {
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"description": "Set of key-value pairs that can be attached to the assistant.",
|
||||
"additionalProperties": true
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number",
|
||||
"format": "float",
|
||||
"description": "Temperature parameter for response generation."
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number",
|
||||
"format": "float",
|
||||
"description": "Top p parameter for response generation."
|
||||
},
|
||||
"response_format": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"enum": ["auto"]
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"object",
|
||||
"created_at",
|
||||
"model",
|
||||
"metadata"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": ["Assistants"]
|
||||
},
|
||||
"patch": {
|
||||
"operationId": "AssistantsController_update",
|
||||
"summary": "Update assistant",
|
||||
"description": "Updates an assistant. Requires at least one modifiable field.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "id",
|
||||
"required": true,
|
||||
"in": "path",
|
||||
"description": "The unique identifier of the assistant.",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "OpenAI-Beta",
|
||||
"required": true,
|
||||
"in": "header",
|
||||
"description": "Beta feature header.",
|
||||
"schema": {
|
||||
"type": "string",
|
||||
"enum": ["assistants=v2"]
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"required": true,
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The model identifier to use for the assistant."
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The name of the assistant."
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "The description of the assistant."
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string",
|
||||
"description": "Instructions for the assistant's behavior."
|
||||
},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"description": "A list of tools enabled on the assistant. Maximum of 128 tools.",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"code_interpreter",
|
||||
"file_search",
|
||||
"function"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tool_resources": {
|
||||
"type": "object",
|
||||
"description": "Resources used by the assistant's tools.",
|
||||
"properties": {
|
||||
"code_interpreter": {
|
||||
"type": "object"
|
||||
},
|
||||
"file_search": {
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"description": "Set of key-value pairs for the assistant.",
|
||||
"additionalProperties": true
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number",
|
||||
"format": "float",
|
||||
"description": "Temperature parameter for response generation."
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number",
|
||||
"format": "float",
|
||||
"description": "Top p parameter for response generation."
|
||||
},
|
||||
"response_format": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"enum": ["auto"]
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"minProperties": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Ok",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The unique identifier of the assistant."
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"enum": ["assistant"],
|
||||
"description": "The object type, which is always 'assistant'."
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer",
|
||||
"description": "Unix timestamp (in seconds) of when the assistant was created."
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The model identifier used by the assistant."
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The name of the assistant."
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "The description of the assistant."
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string",
|
||||
"description": "Instructions for the assistant's behavior."
|
||||
},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"description": "A list of tools enabled on the assistant.",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"code_interpreter",
|
||||
"file_search",
|
||||
"function"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tool_resources": {
|
||||
"type": "object",
|
||||
"description": "Resources used by the assistant's tools.",
|
||||
"properties": {
|
||||
"code_interpreter": {
|
||||
"type": "object"
|
||||
},
|
||||
"file_search": {
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"description": "Set of key-value pairs that can be attached to the assistant.",
|
||||
"additionalProperties": true
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number",
|
||||
"format": "float",
|
||||
"description": "Temperature parameter for response generation."
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number",
|
||||
"format": "float",
|
||||
"description": "Top p parameter for response generation."
|
||||
},
|
||||
"response_format": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"enum": ["auto"]
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"object",
|
||||
"created_at",
|
||||
"model",
|
||||
"metadata"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": ["Assistants"]
|
||||
},
|
||||
"get": {
|
||||
"operationId": "AssistantsController_list",
|
||||
"summary": "List assistants",
|
||||
"description": "Returns a list of assistants.",
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Ok",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"object": {
|
||||
"type": "string",
|
||||
"enum": ["list"],
|
||||
"description": "The object type, which is always 'list' for a list response."
|
||||
},
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/AssistantEntity"
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The unique identifier of the assistant."
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"enum": ["assistant"],
|
||||
"description": "The object type, which is always 'assistant'."
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer",
|
||||
"description": "Unix timestamp (in seconds) of when the assistant was created."
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The model identifier used by the assistant."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"description": "Set of key-value pairs that can be attached to the assistant.",
|
||||
"additionalProperties": true
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"object",
|
||||
"created_at",
|
||||
"model",
|
||||
"metadata"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["object", "data"]
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -88,7 +481,77 @@
|
||||
"get": {
|
||||
"operationId": "AssistantsController_findOne",
|
||||
"summary": "Get assistant",
|
||||
"description": "Retrieves a specific assistant defined by an assistant's `id`.",
|
||||
"description": "Retrieves a specific assistant by ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "id",
|
||||
"required": true,
|
||||
"in": "path",
|
||||
"description": "The unique identifier of the assistant.",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "OpenAI-Beta",
|
||||
"required": true,
|
||||
"in": "header",
|
||||
"description": "Beta feature header.",
|
||||
"schema": {
|
||||
"type": "string",
|
||||
"enum": ["assistants=v2"]
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Ok",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The unique identifier of the assistant."
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"enum": ["assistant"],
|
||||
"description": "The object type, which is always 'assistant'."
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer",
|
||||
"description": "Unix timestamp (in seconds) of when the assistant was created."
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The model identifier used by the assistant."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"description": "Set of key-value pairs attached to the assistant.",
|
||||
"additionalProperties": true
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"object",
|
||||
"created_at",
|
||||
"model",
|
||||
"metadata"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": ["Assistants"]
|
||||
},
|
||||
"delete": {
|
||||
"operationId": "AssistantsController_remove",
|
||||
"summary": "Delete assistant",
|
||||
"description": "Deletes a specific assistant by ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "id",
|
||||
@ -106,36 +569,24 @@
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/AssistantEntity"
|
||||
}
|
||||
}
|
||||
}
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The unique identifier of the deleted assistant."
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"enum": ["assistant.deleted"],
|
||||
"description": "The object type for a deleted assistant."
|
||||
},
|
||||
"deleted": {
|
||||
"type": "boolean",
|
||||
"enum": [true],
|
||||
"description": "Indicates the assistant was successfully deleted."
|
||||
}
|
||||
},
|
||||
"tags": ["Assistants"]
|
||||
},
|
||||
"delete": {
|
||||
"operationId": "AssistantsController_remove",
|
||||
"summary": "Delete assistant",
|
||||
"description": "Deletes a specific assistant defined by an assistant's `id`.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "id",
|
||||
"required": true,
|
||||
"in": "path",
|
||||
"description": "The unique identifier of the assistant.",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The assistant has been successfully deleted.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/DeleteAssistantResponseDto"
|
||||
"required": ["id", "object", "deleted"]
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2199,6 +2650,84 @@
|
||||
"tags": ["Engines"]
|
||||
}
|
||||
},
|
||||
"/engines/{name}/releases/{version}": {
|
||||
"get": {
|
||||
"summary": "List variants for a specific engine version",
|
||||
"description": "Lists all available variants (builds) for a specific version of an engine. Variants can include different CPU architectures (AVX, AVX2, AVX512), GPU support (CUDA, Vulkan), and operating systems (Windows, Linux, macOS).",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "name",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string",
|
||||
"enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
|
||||
"default": "llama-cpp"
|
||||
},
|
||||
"description": "The type of engine"
|
||||
},
|
||||
{
|
||||
"name": "version",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "The version of the engine"
|
||||
},
|
||||
{
|
||||
"name": "show",
|
||||
"in": "query",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string",
|
||||
"enum": ["all", "compatible"],
|
||||
"default": "all"
|
||||
},
|
||||
"description": "Filter the variants list. Use 'compatible' to show only variants compatible with the current system, or 'all' to show all available variants."
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Successfully retrieved variants list",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The name of the variant, including OS, architecture, and capabilities",
|
||||
"example": "linux-amd64-avx-cuda-11-7"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "Creation timestamp of the variant",
|
||||
"example": "2024-11-13T04:51:16Z"
|
||||
},
|
||||
"size": {
|
||||
"type": "integer",
|
||||
"description": "Size of the variant in bytes",
|
||||
"example": 151224604
|
||||
},
|
||||
"download_count": {
|
||||
"type": "integer",
|
||||
"description": "Number of times this variant has been downloaded",
|
||||
"example": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": ["Engines"]
|
||||
}
|
||||
},
|
||||
"/engines/{name}/releases/latest": {
|
||||
"get": {
|
||||
"summary": "Get latest release",
|
||||
@ -2314,7 +2843,7 @@
|
||||
"get_models_url": {
|
||||
"type": "string",
|
||||
"description": "The URL to get models",
|
||||
"example": "https://api.openai.com/v1/models"
|
||||
"example": "https://api.openai.com/models"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3378,6 +3907,7 @@
|
||||
"Files",
|
||||
"Hardware",
|
||||
"Events",
|
||||
"Assistants",
|
||||
"Threads",
|
||||
"Messages",
|
||||
"Pulling Models",
|
||||
@ -4858,8 +5388,8 @@
|
||||
"engine",
|
||||
"version",
|
||||
"inference_params",
|
||||
"TransformReq",
|
||||
"TransformResp",
|
||||
"transform_req",
|
||||
"transform_resp",
|
||||
"metadata"
|
||||
],
|
||||
"properties": {
|
||||
@ -4867,9 +5397,9 @@
|
||||
"type": "string",
|
||||
"description": "The identifier of the model."
|
||||
},
|
||||
"api_key_template": {
|
||||
"header_template": {
|
||||
"type": "string",
|
||||
"description": "Template for the API key header."
|
||||
"description": "Template for the header."
|
||||
},
|
||||
"engine": {
|
||||
"type": "string",
|
||||
@ -4902,7 +5432,7 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"TransformReq": {
|
||||
"transform_req": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"get_models": {
|
||||
@ -4924,7 +5454,7 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"TransformResp": {
|
||||
"transform_resp": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"chat_completions": {
|
||||
@ -5632,9 +6162,9 @@
|
||||
"description": "Number of GPU layers.",
|
||||
"example": 33
|
||||
},
|
||||
"api_key_template": {
|
||||
"header_template": {
|
||||
"type": "string",
|
||||
"description": "Template for the API key header."
|
||||
"description": "Template for the header."
|
||||
},
|
||||
"version": {
|
||||
"type": "string",
|
||||
|
||||
@ -10,7 +10,9 @@ const AutoLink = ({ text }: Props) => {
|
||||
|
||||
return (
|
||||
<>
|
||||
{text.split(delimiter).map((word) => {
|
||||
{text &&
|
||||
typeof text === 'string' &&
|
||||
text.split(delimiter).map((word) => {
|
||||
const match = word.match(delimiter)
|
||||
if (match) {
|
||||
const url = match[0]
|
||||
|
||||
@ -23,7 +23,13 @@ import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
|
||||
import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
|
||||
import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
|
||||
|
||||
const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
||||
const ErrorMessage = ({
|
||||
message,
|
||||
errorComponent,
|
||||
}: {
|
||||
message?: ThreadMessage
|
||||
errorComponent?: React.ReactNode
|
||||
}) => {
|
||||
const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
|
||||
const setMainState = useSetAtom(mainViewStateAtom)
|
||||
const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
|
||||
@ -50,7 +56,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
||||
const getErrorTitle = () => {
|
||||
const engine = getEngine()
|
||||
|
||||
switch (message.metadata?.error_code) {
|
||||
switch (message?.metadata?.error_code) {
|
||||
case ErrorCode.InvalidApiKey:
|
||||
case ErrorCode.AuthenticationError:
|
||||
return (
|
||||
@ -61,7 +67,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
||||
className="font-medium text-[hsla(var(--app-link))] underline"
|
||||
onClick={() => {
|
||||
setMainState(MainViewState.Settings)
|
||||
engine?.name && setSelectedSettingScreen(engine.name)
|
||||
setSelectedSettingScreen(activeAssistant?.model?.engine ?? '')
|
||||
}}
|
||||
>
|
||||
Settings
|
||||
@ -77,7 +83,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
||||
data-testid="passthrough-error-message"
|
||||
className="first-letter:uppercase"
|
||||
>
|
||||
{message.content[0]?.text?.value === 'Failed to fetch' &&
|
||||
{message?.content[0]?.text?.value === 'Failed to fetch' &&
|
||||
engine &&
|
||||
engine?.name !== InferenceEngine.cortex_llamacpp ? (
|
||||
<span>
|
||||
@ -89,6 +95,9 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
||||
{message?.content[0]?.text?.value && (
|
||||
<AutoLink text={message?.content[0]?.text?.value} />
|
||||
)}
|
||||
{!message?.content[0]?.text?.value && (
|
||||
<span>Something went wrong. Please try again.</span>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</p>
|
||||
@ -100,12 +109,15 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
||||
<div className="mx-auto my-6 max-w-[700px] px-4">
|
||||
<div
|
||||
className="mx-auto max-w-[400px] rounded-lg border border-[hsla(var(--app-border))]"
|
||||
key={message.id}
|
||||
key={message?.id}
|
||||
>
|
||||
<div className="flex justify-between border-b border-inherit px-4 py-2">
|
||||
<h6 className="text-[hsla(var(--destructive-bg))]">Error</h6>
|
||||
<div className="flex gap-x-4 text-xs">
|
||||
<div>
|
||||
<h6 className="flex items-center gap-x-1 font-semibold text-[hsla(var(--destructive-bg))]">
|
||||
<span className="h-2 w-2 rounded-full bg-[hsla(var(--destructive-bg))]" />
|
||||
<span>Error</span>
|
||||
</h6>
|
||||
<div className="flex items-center gap-x-4 text-xs">
|
||||
<div className="font-semibold">
|
||||
<span
|
||||
className="flex cursor-pointer items-center gap-x-1 text-[hsla(var(--app-link))]"
|
||||
onClick={() => setModalTroubleShooting(true)}
|
||||
@ -116,7 +128,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
||||
<ModalTroubleShooting />
|
||||
</div>
|
||||
<div
|
||||
className="flex cursor-pointer items-center gap-x-1 text-[hsla(var(--text-secondary))]"
|
||||
className="flex cursor-pointer items-center gap-x-1 font-semibold text-[hsla(var(--text-secondary))]"
|
||||
onClick={handleCopy}
|
||||
>
|
||||
{copied ? (
|
||||
@ -138,10 +150,10 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
||||
</div>
|
||||
<div className="max-h-[80px] w-full overflow-x-auto p-4 py-2">
|
||||
<div
|
||||
className="text-xs leading-relaxed text-[hsla(var(--text-secondary))]"
|
||||
className="font-serif text-xs leading-relaxed text-[hsla(var(--text-secondary))]"
|
||||
ref={errorDivRef}
|
||||
>
|
||||
{getErrorTitle()}
|
||||
{errorComponent ? errorComponent : getErrorTitle()}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -87,7 +87,7 @@ describe('SystemMonitor', () => {
|
||||
|
||||
expect(screen.getByText('Running Models')).toBeInTheDocument()
|
||||
expect(screen.getByText('App Log')).toBeInTheDocument()
|
||||
expect(screen.getByText('7.45/14.90 GB')).toBeInTheDocument()
|
||||
expect(screen.getByText('7.45GB / 14.90GB')).toBeInTheDocument()
|
||||
expect(screen.getByText('30%')).toBeInTheDocument()
|
||||
})
|
||||
|
||||
|
||||
@ -134,7 +134,7 @@ const SystemMonitor = () => {
|
||||
<div className="flex items-center justify-between gap-2">
|
||||
<h6 className="font-bold">Memory</h6>
|
||||
<span>
|
||||
{toGigabytes(usedRam, { hideUnit: true })}/
|
||||
{toGigabytes(usedRam, { hideUnit: true })}GB /{' '}
|
||||
{toGigabytes(totalRam, { hideUnit: true })}GB
|
||||
</span>
|
||||
</div>
|
||||
@ -149,10 +149,12 @@ const SystemMonitor = () => {
|
||||
</div>
|
||||
{gpus.length > 0 && (
|
||||
<div className="mb-4 border-b border-[hsla(var(--app-border))] pb-4 last:border-none">
|
||||
{gpus.map((gpu, index) => {
|
||||
{gpus
|
||||
.filter((gpu) => gpu.activated === true)
|
||||
.map((gpu, index) => {
|
||||
const gpuUtilization = utilizedMemory(
|
||||
gpu.memoryFree,
|
||||
gpu.memoryTotal
|
||||
gpu.free_vram,
|
||||
gpu.total_vram
|
||||
)
|
||||
return (
|
||||
<div key={index} className="mt-4 flex flex-col gap-x-2">
|
||||
@ -163,8 +165,8 @@ const SystemMonitor = () => {
|
||||
<div className="flex gap-x-2">
|
||||
<div className="">
|
||||
<span>
|
||||
{gpu.memoryTotal - gpu.memoryFree}/
|
||||
{gpu.memoryTotal}
|
||||
{gpu.total_vram - gpu.free_vram}/
|
||||
{gpu.total_vram}
|
||||
</span>
|
||||
<span> MB</span>
|
||||
</div>
|
||||
|
||||
@ -25,6 +25,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal'
|
||||
import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
|
||||
import SelectingModelModal from '@/screens/Settings/SelectingModelModal'
|
||||
|
||||
import { getAppDistinctId, updateDistinctId } from '@/utils/settings'
|
||||
|
||||
import LoadingModal from '../LoadingModal'
|
||||
|
||||
import MainViewContainer from '../MainViewContainer'
|
||||
@ -96,8 +98,16 @@ const BaseLayout = () => {
|
||||
return properties
|
||||
},
|
||||
})
|
||||
// Attempt to restore distinct Id from app global settings
|
||||
getAppDistinctId()
|
||||
.then((id) => {
|
||||
if (id) posthog.identify(id)
|
||||
})
|
||||
.finally(() => {
|
||||
posthog.opt_in_capturing()
|
||||
posthog.register({ app_version: VERSION })
|
||||
updateDistinctId(posthog.get_distinct_id())
|
||||
})
|
||||
} else {
|
||||
posthog.opt_out_capturing()
|
||||
}
|
||||
|
||||
@ -28,6 +28,8 @@ import ModelLabel from '@/containers/ModelLabel'
|
||||
|
||||
import SetupRemoteModel from '@/containers/SetupRemoteModel'
|
||||
|
||||
import { useActiveModel } from '@/hooks/useActiveModel'
|
||||
|
||||
import { useCreateNewThread } from '@/hooks/useCreateNewThread'
|
||||
import useDownloadModel from '@/hooks/useDownloadModel'
|
||||
import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
|
||||
@ -40,7 +42,7 @@ import useUpdateModelParameters from '@/hooks/useUpdateModelParameters'
|
||||
import { formatDownloadPercentage, toGigabytes } from '@/utils/converter'
|
||||
|
||||
import { manualRecommendationModel } from '@/utils/model'
|
||||
import { getLogoEngine } from '@/utils/modelEngine'
|
||||
import { getLogoEngine, getTitleByEngine } from '@/utils/modelEngine'
|
||||
|
||||
import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
|
||||
import {
|
||||
@ -93,6 +95,7 @@ const ModelDropdown = ({
|
||||
const { updateModelParameter } = useUpdateModelParameters()
|
||||
const searchInputRef = useRef<HTMLInputElement>(null)
|
||||
const configuredModels = useAtomValue(configuredModelsAtom)
|
||||
const { stopModel } = useActiveModel()
|
||||
|
||||
const featuredModels = configuredModels.filter(
|
||||
(x) =>
|
||||
@ -226,6 +229,7 @@ const ModelDropdown = ({
|
||||
const model = downloadedModels.find((m) => m.id === modelId)
|
||||
setSelectedModel(model)
|
||||
setOpen(false)
|
||||
stopModel()
|
||||
|
||||
if (activeThread) {
|
||||
// Change assistand tools based on model support RAG
|
||||
@ -248,18 +252,13 @@ const ModelDropdown = ({
|
||||
],
|
||||
})
|
||||
|
||||
const defaultContextLength = Math.min(
|
||||
8192,
|
||||
model?.settings.ctx_len ?? 8192
|
||||
)
|
||||
|
||||
const contextLength = model?.settings.ctx_len
|
||||
? Math.min(8192, model?.settings.ctx_len ?? 8192)
|
||||
: undefined
|
||||
const overriddenParameters = {
|
||||
ctx_len: model?.settings.ctx_len ? defaultContextLength : undefined,
|
||||
max_tokens: defaultContextLength
|
||||
? Math.min(
|
||||
model?.parameters.max_tokens ?? 8192,
|
||||
defaultContextLength
|
||||
)
|
||||
ctx_len: contextLength,
|
||||
max_tokens: contextLength
|
||||
? Math.min(model?.parameters.max_tokens ?? 8192, contextLength)
|
||||
: model?.parameters.max_tokens,
|
||||
}
|
||||
|
||||
@ -289,6 +288,7 @@ const ModelDropdown = ({
|
||||
updateThreadMetadata,
|
||||
setThreadModelParams,
|
||||
updateModelParameter,
|
||||
stopModel,
|
||||
]
|
||||
)
|
||||
|
||||
@ -429,7 +429,7 @@ const ModelDropdown = ({
|
||||
/>
|
||||
)}
|
||||
<h6 className="font-medium capitalize text-[hsla(var(--text-secondary))]">
|
||||
{engine.name}
|
||||
{getTitleByEngine(engine.name)}
|
||||
</h6>
|
||||
</div>
|
||||
<div className="-mr-2 flex gap-1">
|
||||
@ -475,7 +475,7 @@ const ModelDropdown = ({
|
||||
>
|
||||
<div className="flex items-center gap-2">
|
||||
<p
|
||||
className="line-clamp-1 text-[hsla(var(--text-secondary))]"
|
||||
className="max-w-[200px] overflow-hidden truncate whitespace-nowrap text-[hsla(var(--text-secondary))]"
|
||||
title={model.name}
|
||||
>
|
||||
{model.name}
|
||||
@ -549,6 +549,8 @@ const ModelDropdown = ({
|
||||
(c) => c.id === model.id
|
||||
)
|
||||
return (
|
||||
<>
|
||||
{isDownloaded && (
|
||||
<li
|
||||
key={model.id}
|
||||
className={twMerge(
|
||||
@ -558,7 +560,10 @@ const ModelDropdown = ({
|
||||
: 'text-[hsla(var(--text-primary))]'
|
||||
)}
|
||||
onClick={() => {
|
||||
if (!isConfigured && engine.type === 'remote')
|
||||
if (
|
||||
!isConfigured &&
|
||||
engine.type === 'remote'
|
||||
)
|
||||
return null
|
||||
if (isDownloaded) {
|
||||
onClickModelItem(model.id)
|
||||
@ -568,7 +573,7 @@ const ModelDropdown = ({
|
||||
<div className="flex gap-x-2">
|
||||
<p
|
||||
className={twMerge(
|
||||
'line-clamp-1',
|
||||
'max-w-[200px] overflow-hidden truncate whitespace-nowrap',
|
||||
!isDownloaded &&
|
||||
'text-[hsla(var(--text-secondary))]'
|
||||
)}
|
||||
@ -618,6 +623,8 @@ const ModelDropdown = ({
|
||||
)}
|
||||
</div>
|
||||
</li>
|
||||
)}
|
||||
</>
|
||||
)
|
||||
})}
|
||||
</ul>
|
||||
|
||||
@ -29,15 +29,20 @@ const ModelLabel = ({ size, compact }: Props) => {
|
||||
const { settings } = useSettings()
|
||||
|
||||
const getLabel = (size: number) => {
|
||||
const minimumRamModel = size * 1.25
|
||||
const availableRam =
|
||||
settings?.run_mode === 'gpu'
|
||||
const minimumRamModel = (size * 1.25) / (1024 * 1024)
|
||||
|
||||
const availableRam = settings?.gpus?.some((gpu) => gpu.activated)
|
||||
? availableVram * 1000000 // MB to bytes
|
||||
: totalRam - usedRam + (activeModel?.metadata?.size ?? 0)
|
||||
: totalRam -
|
||||
(usedRam +
|
||||
(activeModel?.metadata?.size
|
||||
? (activeModel.metadata.size * 1.25) / (1024 * 1024)
|
||||
: 0))
|
||||
|
||||
if (minimumRamModel > totalRam) {
|
||||
return (
|
||||
<NotEnoughMemoryLabel
|
||||
unit={settings?.run_mode === 'gpu' ? 'VRAM' : 'RAM'}
|
||||
unit={settings?.gpus?.some((gpu) => gpu.activated) ? 'VRAM' : 'RAM'}
|
||||
compact={compact}
|
||||
/>
|
||||
)
|
||||
|
||||
@ -143,8 +143,7 @@ export default function ModelHandler() {
|
||||
return
|
||||
}
|
||||
|
||||
// The thread title should not be updated if the message is less than 10 words
|
||||
// And no new line character is present
|
||||
// No new line character is presented in the title
|
||||
// And non-alphanumeric characters should be removed
|
||||
if (messageContent.includes('\n')) {
|
||||
messageContent = messageContent.replace(/\n/g, ' ')
|
||||
|
||||
@ -93,14 +93,8 @@ const ServerLogs = (props: ServerLogsProps) => {
|
||||
}, [listRef.current?.scrollHeight, isUserManuallyScrollingUp, logs])
|
||||
|
||||
return (
|
||||
<ScrollArea
|
||||
ref={listRef}
|
||||
className={twMerge(
|
||||
'h-[calc(100%-49px)] w-full p-4 py-0',
|
||||
logs.length === 0 && 'mx-auto'
|
||||
)}
|
||||
onScroll={handleScroll}
|
||||
>
|
||||
<>
|
||||
<div>
|
||||
{withCopy && (
|
||||
<div className="absolute right-2 top-7">
|
||||
<div className="flex w-full flex-row gap-2">
|
||||
@ -140,6 +134,15 @@ const ServerLogs = (props: ServerLogsProps) => {
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<ScrollArea
|
||||
ref={listRef}
|
||||
className={twMerge(
|
||||
'h-[calc(100%-49px)] w-full p-4 py-0',
|
||||
logs.length === 0 && 'mx-auto'
|
||||
)}
|
||||
onScroll={handleScroll}
|
||||
>
|
||||
<div className="flex h-full w-full flex-col">
|
||||
{logs.length > 0 ? (
|
||||
<code className="inline-block max-w-[38vw] whitespace-break-spaces text-[13px] lg:max-w-[40vw] xl:max-w-[50vw]">
|
||||
@ -155,7 +158,7 @@ const ServerLogs = (props: ServerLogsProps) => {
|
||||
<div
|
||||
className={twMerge(
|
||||
'mt-24 flex w-full flex-col items-center justify-center',
|
||||
withCopy && 'mt-0 py-2'
|
||||
withCopy && 'mt-4 py-2'
|
||||
)}
|
||||
>
|
||||
<svg
|
||||
@ -287,11 +290,14 @@ const ServerLogs = (props: ServerLogsProps) => {
|
||||
</linearGradient>
|
||||
</defs>
|
||||
</svg>
|
||||
<p className="text-[hsla(var(--text-secondary)] mt-4">Empty logs</p>
|
||||
<p className="text-[hsla(var(--text-secondary)] mt-4">
|
||||
Empty logs
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</ScrollArea>
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@ -73,7 +73,7 @@ const SliderRightPanel = ({
|
||||
trigger={
|
||||
<Input
|
||||
type="text"
|
||||
className="-mt-4 h-8 w-[60px]"
|
||||
className="-mt-4 h-8 w-[68px]"
|
||||
min={min}
|
||||
max={max}
|
||||
value={val}
|
||||
|
||||
@ -8,6 +8,8 @@ export const mainViewStateAtom = atom<MainViewState>(MainViewState.Thread)
|
||||
|
||||
export const defaultJanDataFolderAtom = atom<string>('')
|
||||
|
||||
export const LocalEngineDefaultVariantAtom = atom<string>('')
|
||||
|
||||
const SHOW_RIGHT_PANEL = 'showRightPanel'
|
||||
|
||||
// Store panel atom
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user