Merge pull request #4683 from janhq/chore/sync-release-to-dev
chore: sync release v0.5.15 branch into dev branch
This commit is contained in:
commit
c4d7a143eb
40
.github/workflows/jan-electron-build-beta.yml
vendored
40
.github/workflows/jan-electron-build-beta.yml
vendored
@ -9,31 +9,6 @@ jobs:
|
|||||||
get-update-version:
|
get-update-version:
|
||||||
uses: ./.github/workflows/template-get-update-version.yml
|
uses: ./.github/workflows/template-get-update-version.yml
|
||||||
|
|
||||||
create-draft-release:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
|
|
||||||
outputs:
|
|
||||||
upload_url: ${{ steps.create_release.outputs.upload_url }}
|
|
||||||
version: ${{ steps.get_version.outputs.version }}
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
steps:
|
|
||||||
- name: Extract tag name without v prefix
|
|
||||||
id: get_version
|
|
||||||
run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}"
|
|
||||||
env:
|
|
||||||
GITHUB_REF: ${{ github.ref }}
|
|
||||||
- name: Create Draft Release
|
|
||||||
id: create_release
|
|
||||||
uses: softprops/action-gh-release@v2
|
|
||||||
with:
|
|
||||||
tag_name: ${{ github.ref_name }}
|
|
||||||
token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
name: "${{ env.VERSION }}"
|
|
||||||
draft: true
|
|
||||||
prerelease: false
|
|
||||||
generate_release_notes: true
|
|
||||||
|
|
||||||
build-macos:
|
build-macos:
|
||||||
uses: ./.github/workflows/template-build-macos.yml
|
uses: ./.github/workflows/template-build-macos.yml
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
@ -65,7 +40,7 @@ jobs:
|
|||||||
beta: true
|
beta: true
|
||||||
|
|
||||||
sync-temp-to-latest:
|
sync-temp-to-latest:
|
||||||
needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64]
|
needs: [build-macos, build-windows-x64, build-linux-x64]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
@ -82,19 +57,15 @@ jobs:
|
|||||||
AWS_DEFAULT_REGION: ${{ secrets.DELTA_AWS_REGION }}
|
AWS_DEFAULT_REGION: ${{ secrets.DELTA_AWS_REGION }}
|
||||||
AWS_EC2_METADATA_DISABLED: "true"
|
AWS_EC2_METADATA_DISABLED: "true"
|
||||||
|
|
||||||
- name: set release to prerelease
|
|
||||||
run: |
|
|
||||||
gh release edit v${{ needs.create-draft-release.outputs.version }} --draft=false --prerelease
|
|
||||||
env:
|
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
|
|
||||||
noti-discord-and-update-url-readme:
|
noti-discord-and-update-url-readme:
|
||||||
needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64, sync-temp-to-latest]
|
needs: [build-macos, get-update-version, build-windows-x64, build-linux-x64, sync-temp-to-latest]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Set version to environment variable
|
- name: Set version to environment variable
|
||||||
run: |
|
run: |
|
||||||
echo "VERSION=${{ needs.create-draft-release.outputs.version }}" >> $GITHUB_ENV
|
VERSION=${{ needs.get-update-version.outputs.new_version }}
|
||||||
|
VERSION="${VERSION#v}"
|
||||||
|
echo "VERSION=$VERSION" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Notify Discord
|
- name: Notify Discord
|
||||||
uses: Ilshidur/action-discord@master
|
uses: Ilshidur/action-discord@master
|
||||||
@ -105,6 +76,5 @@ jobs:
|
|||||||
- macOS Universal: https://delta.jan.ai/beta/jan-beta-mac-universal-{{ VERSION }}.dmg
|
- macOS Universal: https://delta.jan.ai/beta/jan-beta-mac-universal-{{ VERSION }}.dmg
|
||||||
- Linux Deb: https://delta.jan.ai/beta/jan-beta-linux-amd64-{{ VERSION }}.deb
|
- Linux Deb: https://delta.jan.ai/beta/jan-beta-linux-amd64-{{ VERSION }}.deb
|
||||||
- Linux AppImage: https://delta.jan.ai/beta/jan-beta-linux-x86_64-{{ VERSION }}.AppImage
|
- Linux AppImage: https://delta.jan.ai/beta/jan-beta-linux-x86_64-{{ VERSION }}.AppImage
|
||||||
- Github Release URL: https://github.com/janhq/jan/releases/tag/v{{ VERSION }}
|
|
||||||
env:
|
env:
|
||||||
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_JAN_BETA }}
|
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_JAN_BETA }}
|
||||||
39
.github/workflows/template-build-jan-server.yml
vendored
39
.github/workflows/template-build-jan-server.yml
vendored
@ -1,39 +0,0 @@
|
|||||||
name: build-jan-server
|
|
||||||
on:
|
|
||||||
workflow_call:
|
|
||||||
inputs:
|
|
||||||
dockerfile_path:
|
|
||||||
required: false
|
|
||||||
type: string
|
|
||||||
default: './Dockerfile'
|
|
||||||
docker_image_tag:
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
default: 'ghcr.io/janhq/jan-server:dev-latest'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
env:
|
|
||||||
REGISTRY: ghcr.io
|
|
||||||
IMAGE_NAME: janhq/jan-server
|
|
||||||
permissions:
|
|
||||||
packages: write
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Log in to the Container registry
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
registry: ${{ env.REGISTRY }}
|
|
||||||
username: ${{ github.actor }}
|
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
|
|
||||||
- name: Build and push Docker image
|
|
||||||
uses: docker/build-push-action@v3
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
file: ${{ inputs.dockerfile_path }}
|
|
||||||
push: true
|
|
||||||
tags: ${{ inputs.docker_image_tag }}
|
|
||||||
@ -83,7 +83,7 @@ jobs:
|
|||||||
cat ./electron/package.json
|
cat ./electron/package.json
|
||||||
echo "------------------------"
|
echo "------------------------"
|
||||||
cat ./package.json
|
cat ./package.json
|
||||||
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
|
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
|
||||||
mv /tmp/package.json electron/package.json
|
mv /tmp/package.json electron/package.json
|
||||||
cat electron/package.json
|
cat electron/package.json
|
||||||
|
|
||||||
|
|||||||
2
.github/workflows/template-build-macos.yml
vendored
2
.github/workflows/template-build-macos.yml
vendored
@ -99,7 +99,7 @@ jobs:
|
|||||||
cat ./electron/package.json
|
cat ./electron/package.json
|
||||||
echo "------------------------"
|
echo "------------------------"
|
||||||
cat ./package.json
|
cat ./package.json
|
||||||
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
|
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
|
||||||
mv /tmp/package.json electron/package.json
|
mv /tmp/package.json electron/package.json
|
||||||
cat electron/package.json
|
cat electron/package.json
|
||||||
|
|
||||||
|
|||||||
@ -108,7 +108,7 @@ jobs:
|
|||||||
cat ./package.json
|
cat ./package.json
|
||||||
echo "------------------------"
|
echo "------------------------"
|
||||||
cat ./electron/scripts/uninstaller.nsh
|
cat ./electron/scripts/uninstaller.nsh
|
||||||
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
|
jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
|
||||||
mv /tmp/package.json electron/package.json
|
mv /tmp/package.json electron/package.json
|
||||||
cat electron/package.json
|
cat electron/package.json
|
||||||
|
|
||||||
|
|||||||
@ -12,6 +12,7 @@ export enum ExtensionTypeEnum {
|
|||||||
SystemMonitoring = 'systemMonitoring',
|
SystemMonitoring = 'systemMonitoring',
|
||||||
HuggingFace = 'huggingFace',
|
HuggingFace = 'huggingFace',
|
||||||
Engine = 'engine',
|
Engine = 'engine',
|
||||||
|
Hardware = 'hardware',
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ExtensionType {
|
export interface ExtensionType {
|
||||||
|
|||||||
@ -38,8 +38,14 @@ describe('OAIEngine', () => {
|
|||||||
|
|
||||||
it('should subscribe to events on load', () => {
|
it('should subscribe to events on load', () => {
|
||||||
engine.onLoad()
|
engine.onLoad()
|
||||||
expect(events.on).toHaveBeenCalledWith(MessageEvent.OnMessageSent, expect.any(Function))
|
expect(events.on).toHaveBeenCalledWith(
|
||||||
expect(events.on).toHaveBeenCalledWith(InferenceEvent.OnInferenceStopped, expect.any(Function))
|
MessageEvent.OnMessageSent,
|
||||||
|
expect.any(Function)
|
||||||
|
)
|
||||||
|
expect(events.on).toHaveBeenCalledWith(
|
||||||
|
InferenceEvent.OnInferenceStopped,
|
||||||
|
expect.any(Function)
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('should handle inference request', async () => {
|
it('should handle inference request', async () => {
|
||||||
@ -77,7 +83,12 @@ describe('OAIEngine', () => {
|
|||||||
expect(events.emit).toHaveBeenCalledWith(
|
expect(events.emit).toHaveBeenCalledWith(
|
||||||
MessageEvent.OnMessageUpdate,
|
MessageEvent.OnMessageUpdate,
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
content: [{ type: ContentType.Text, text: { value: 'test response', annotations: [] } }],
|
content: [
|
||||||
|
{
|
||||||
|
type: ContentType.Text,
|
||||||
|
text: { value: 'test response', annotations: [] },
|
||||||
|
},
|
||||||
|
],
|
||||||
status: MessageStatus.Ready,
|
status: MessageStatus.Ready,
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
@ -101,11 +112,10 @@ describe('OAIEngine', () => {
|
|||||||
|
|
||||||
await engine.inference(data)
|
await engine.inference(data)
|
||||||
|
|
||||||
expect(events.emit).toHaveBeenCalledWith(
|
expect(events.emit).toHaveBeenLastCalledWith(
|
||||||
MessageEvent.OnMessageUpdate,
|
MessageEvent.OnMessageUpdate,
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
content: [{ type: ContentType.Text, text: { value: 'test error', annotations: [] } }],
|
status: 'error',
|
||||||
status: MessageStatus.Error,
|
|
||||||
error_code: 500,
|
error_code: 500,
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
|
|||||||
@ -42,7 +42,9 @@ export abstract class OAIEngine extends AIEngine {
|
|||||||
*/
|
*/
|
||||||
override onLoad() {
|
override onLoad() {
|
||||||
super.onLoad()
|
super.onLoad()
|
||||||
events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data))
|
events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
|
||||||
|
this.inference(data)
|
||||||
|
)
|
||||||
events.on(InferenceEvent.OnInferenceStopped, () => this.stopInference())
|
events.on(InferenceEvent.OnInferenceStopped, () => this.stopInference())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -128,7 +130,9 @@ export abstract class OAIEngine extends AIEngine {
|
|||||||
events.emit(MessageEvent.OnMessageUpdate, message)
|
events.emit(MessageEvent.OnMessageUpdate, message)
|
||||||
},
|
},
|
||||||
complete: async () => {
|
complete: async () => {
|
||||||
message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error
|
message.status = message.content.length
|
||||||
|
? MessageStatus.Ready
|
||||||
|
: MessageStatus.Error
|
||||||
events.emit(MessageEvent.OnMessageUpdate, message)
|
events.emit(MessageEvent.OnMessageUpdate, message)
|
||||||
},
|
},
|
||||||
error: async (err: any) => {
|
error: async (err: any) => {
|
||||||
@ -141,7 +145,10 @@ export abstract class OAIEngine extends AIEngine {
|
|||||||
message.content[0] = {
|
message.content[0] = {
|
||||||
type: ContentType.Text,
|
type: ContentType.Text,
|
||||||
text: {
|
text: {
|
||||||
value: err.message,
|
value:
|
||||||
|
typeof message === 'string'
|
||||||
|
? err.message
|
||||||
|
: (JSON.stringify(err.message) ?? err.detail),
|
||||||
annotations: [],
|
annotations: [],
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,14 +1,17 @@
|
|||||||
import { lastValueFrom, Observable } from 'rxjs'
|
import { lastValueFrom, Observable } from 'rxjs'
|
||||||
import { requestInference } from './sse'
|
import { requestInference } from './sse'
|
||||||
|
|
||||||
import { ReadableStream } from 'stream/web';
|
import { ReadableStream } from 'stream/web'
|
||||||
describe('requestInference', () => {
|
describe('requestInference', () => {
|
||||||
it('should send a request to the inference server and return an Observable', () => {
|
it('should send a request to the inference server and return an Observable', () => {
|
||||||
// Mock the fetch function
|
// Mock the fetch function
|
||||||
const mockFetch: any = jest.fn(() =>
|
const mockFetch: any = jest.fn(() =>
|
||||||
Promise.resolve({
|
Promise.resolve({
|
||||||
ok: true,
|
ok: true,
|
||||||
json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }),
|
json: () =>
|
||||||
|
Promise.resolve({
|
||||||
|
choices: [{ message: { content: 'Generated response' } }],
|
||||||
|
}),
|
||||||
headers: new Headers(),
|
headers: new Headers(),
|
||||||
redirected: false,
|
redirected: false,
|
||||||
status: 200,
|
status: 200,
|
||||||
@ -36,7 +39,10 @@ describe('requestInference', () => {
|
|||||||
const mockFetch: any = jest.fn(() =>
|
const mockFetch: any = jest.fn(() =>
|
||||||
Promise.resolve({
|
Promise.resolve({
|
||||||
ok: false,
|
ok: false,
|
||||||
json: () => Promise.resolve({ error: { message: 'Wrong API Key', code: 'invalid_api_key' } }),
|
json: () =>
|
||||||
|
Promise.resolve({
|
||||||
|
error: { message: 'Invalid API Key.', code: 'invalid_api_key' },
|
||||||
|
}),
|
||||||
headers: new Headers(),
|
headers: new Headers(),
|
||||||
redirected: false,
|
redirected: false,
|
||||||
status: 401,
|
status: 401,
|
||||||
@ -56,16 +62,22 @@ describe('requestInference', () => {
|
|||||||
|
|
||||||
// Assert the expected behavior
|
// Assert the expected behavior
|
||||||
expect(result).toBeInstanceOf(Observable)
|
expect(result).toBeInstanceOf(Observable)
|
||||||
expect(lastValueFrom(result)).rejects.toEqual({ message: 'Wrong API Key', code: 'invalid_api_key' })
|
expect(lastValueFrom(result)).rejects.toEqual({
|
||||||
|
message: 'Invalid API Key.',
|
||||||
|
code: 'invalid_api_key',
|
||||||
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
it('should handle a successful response with a transformResponse function', () => {
|
it('should handle a successful response with a transformResponse function', () => {
|
||||||
// Mock the fetch function
|
// Mock the fetch function
|
||||||
const mockFetch: any = jest.fn(() =>
|
const mockFetch: any = jest.fn(() =>
|
||||||
Promise.resolve({
|
Promise.resolve({
|
||||||
ok: true,
|
ok: true,
|
||||||
json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }),
|
json: () =>
|
||||||
|
Promise.resolve({
|
||||||
|
choices: [{ message: { content: 'Generated response' } }],
|
||||||
|
}),
|
||||||
headers: new Headers(),
|
headers: new Headers(),
|
||||||
redirected: false,
|
redirected: false,
|
||||||
status: 200,
|
status: 200,
|
||||||
@ -78,47 +90,57 @@ describe('requestInference', () => {
|
|||||||
const inferenceUrl = 'https://inference-server.com'
|
const inferenceUrl = 'https://inference-server.com'
|
||||||
const requestBody = { message: 'Hello' }
|
const requestBody = { message: 'Hello' }
|
||||||
const model = { id: 'model-id', parameters: { stream: false } }
|
const model = { id: 'model-id', parameters: { stream: false } }
|
||||||
const transformResponse = (data: any) => data.choices[0].message.content.toUpperCase()
|
const transformResponse = (data: any) =>
|
||||||
|
data.choices[0].message.content.toUpperCase()
|
||||||
|
|
||||||
// Call the function
|
// Call the function
|
||||||
const result = requestInference(inferenceUrl, requestBody, model, undefined, undefined, transformResponse)
|
const result = requestInference(
|
||||||
|
inferenceUrl,
|
||||||
|
requestBody,
|
||||||
|
model,
|
||||||
|
undefined,
|
||||||
|
undefined,
|
||||||
|
transformResponse
|
||||||
|
)
|
||||||
|
|
||||||
// Assert the expected behavior
|
// Assert the expected behavior
|
||||||
expect(result).toBeInstanceOf(Observable)
|
expect(result).toBeInstanceOf(Observable)
|
||||||
expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE')
|
expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE')
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('should handle a successful response with streaming enabled', () => {
|
||||||
it('should handle a successful response with streaming enabled', () => {
|
|
||||||
// Mock the fetch function
|
// Mock the fetch function
|
||||||
const mockFetch: any = jest.fn(() =>
|
const mockFetch: any = jest.fn(() =>
|
||||||
Promise.resolve({
|
Promise.resolve({
|
||||||
ok: true,
|
ok: true,
|
||||||
body: new ReadableStream({
|
body: new ReadableStream({
|
||||||
start(controller) {
|
start(controller) {
|
||||||
controller.enqueue(new TextEncoder().encode('data: {"choices": [{"delta": {"content": "Streamed"}}]}'));
|
controller.enqueue(
|
||||||
controller.enqueue(new TextEncoder().encode('data: [DONE]'));
|
new TextEncoder().encode(
|
||||||
controller.close();
|
'data: {"choices": [{"delta": {"content": "Streamed"}}]}'
|
||||||
}
|
)
|
||||||
|
)
|
||||||
|
controller.enqueue(new TextEncoder().encode('data: [DONE]'))
|
||||||
|
controller.close()
|
||||||
|
},
|
||||||
}),
|
}),
|
||||||
headers: new Headers(),
|
headers: new Headers(),
|
||||||
redirected: false,
|
redirected: false,
|
||||||
status: 200,
|
status: 200,
|
||||||
statusText: 'OK',
|
statusText: 'OK',
|
||||||
})
|
})
|
||||||
);
|
)
|
||||||
jest.spyOn(global, 'fetch').mockImplementation(mockFetch);
|
jest.spyOn(global, 'fetch').mockImplementation(mockFetch)
|
||||||
|
|
||||||
// Define the test inputs
|
// Define the test inputs
|
||||||
const inferenceUrl = 'https://inference-server.com';
|
const inferenceUrl = 'https://inference-server.com'
|
||||||
const requestBody = { message: 'Hello' };
|
const requestBody = { message: 'Hello' }
|
||||||
const model = { id: 'model-id', parameters: { stream: true } };
|
const model = { id: 'model-id', parameters: { stream: true } }
|
||||||
|
|
||||||
// Call the function
|
// Call the function
|
||||||
const result = requestInference(inferenceUrl, requestBody, model);
|
const result = requestInference(inferenceUrl, requestBody, model)
|
||||||
|
|
||||||
// Assert the expected behavior
|
// Assert the expected behavior
|
||||||
expect(result).toBeInstanceOf(Observable);
|
expect(result).toBeInstanceOf(Observable)
|
||||||
expect(lastValueFrom(result)).resolves.toEqual('Streamed');
|
expect(lastValueFrom(result)).resolves.toEqual('Streamed')
|
||||||
});
|
})
|
||||||
|
|
||||||
|
|||||||
@ -32,21 +32,20 @@ export function requestInference(
|
|||||||
})
|
})
|
||||||
.then(async (response) => {
|
.then(async (response) => {
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
const data = await response.json()
|
if (response.status === 401) {
|
||||||
let errorCode = ErrorCode.Unknown
|
throw {
|
||||||
if (data.error) {
|
code: ErrorCode.InvalidApiKey,
|
||||||
errorCode = data.error.code ?? data.error.type ?? ErrorCode.Unknown
|
message: 'Invalid API Key.',
|
||||||
} else if (response.status === 401) {
|
|
||||||
errorCode = ErrorCode.InvalidApiKey
|
|
||||||
}
|
}
|
||||||
const error = {
|
|
||||||
message: data.error?.message ?? data.message ?? 'Error occurred.',
|
|
||||||
code: errorCode,
|
|
||||||
}
|
}
|
||||||
subscriber.error(error)
|
let data = await response.json()
|
||||||
subscriber.complete()
|
try {
|
||||||
|
handleError(data)
|
||||||
|
} catch (err) {
|
||||||
|
subscriber.error(err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
}
|
||||||
// There could be overriden stream parameter in the model
|
// There could be overriden stream parameter in the model
|
||||||
// that is set in request body (transformed payload)
|
// that is set in request body (transformed payload)
|
||||||
if (
|
if (
|
||||||
@ -54,9 +53,10 @@ export function requestInference(
|
|||||||
model.parameters?.stream === false
|
model.parameters?.stream === false
|
||||||
) {
|
) {
|
||||||
const data = await response.json()
|
const data = await response.json()
|
||||||
if (data.error || data.message) {
|
try {
|
||||||
subscriber.error(data.error ?? data)
|
handleError(data)
|
||||||
subscriber.complete()
|
} catch (err) {
|
||||||
|
subscriber.error(err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if (transformResponse) {
|
if (transformResponse) {
|
||||||
@ -91,13 +91,10 @@ export function requestInference(
|
|||||||
const toParse = cachedLines + line
|
const toParse = cachedLines + line
|
||||||
if (!line.includes('data: [DONE]')) {
|
if (!line.includes('data: [DONE]')) {
|
||||||
const data = JSON.parse(toParse.replace('data: ', ''))
|
const data = JSON.parse(toParse.replace('data: ', ''))
|
||||||
if (
|
try {
|
||||||
'error' in data ||
|
handleError(data)
|
||||||
'message' in data ||
|
} catch (err) {
|
||||||
'detail' in data
|
subscriber.error(err)
|
||||||
) {
|
|
||||||
subscriber.error(data.error ?? data)
|
|
||||||
subscriber.complete()
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
content += data.choices[0]?.delta?.content ?? ''
|
content += data.choices[0]?.delta?.content ?? ''
|
||||||
@ -118,3 +115,18 @@ export function requestInference(
|
|||||||
.catch((err) => subscriber.error(err))
|
.catch((err) => subscriber.error(err))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle error and normalize it to a common format.
|
||||||
|
* @param data
|
||||||
|
*/
|
||||||
|
const handleError = (data: any) => {
|
||||||
|
if (
|
||||||
|
data.error ||
|
||||||
|
data.message ||
|
||||||
|
data.detail ||
|
||||||
|
(Array.isArray(data) && data.length && data[0].error)
|
||||||
|
) {
|
||||||
|
throw data.error ?? data[0]?.error ?? data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
26
core/src/browser/extensions/hardwareManagement.ts
Normal file
26
core/src/browser/extensions/hardwareManagement.ts
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import { HardwareInformation } from '../../types'
|
||||||
|
import { BaseExtension, ExtensionTypeEnum } from '../extension'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Engine management extension. Persists and retrieves engine management.
|
||||||
|
* @abstract
|
||||||
|
* @extends BaseExtension
|
||||||
|
*/
|
||||||
|
export abstract class HardwareManagementExtension extends BaseExtension {
|
||||||
|
type(): ExtensionTypeEnum | undefined {
|
||||||
|
return ExtensionTypeEnum.Hardware
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @returns A Promise that resolves to an object of list hardware.
|
||||||
|
*/
|
||||||
|
abstract getHardware(): Promise<HardwareInformation>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @returns A Promise that resolves to an object of set active gpus.
|
||||||
|
*/
|
||||||
|
abstract setAvtiveGpu(data: { gpus: number[] }): Promise<{
|
||||||
|
message: string
|
||||||
|
activated_gpus: number[]
|
||||||
|
}>
|
||||||
|
}
|
||||||
@ -1,6 +1,5 @@
|
|||||||
import { ConversationalExtension } from './index';
|
import { ConversationalExtension } from './index';
|
||||||
import { InferenceExtension } from './index';
|
import { InferenceExtension } from './index';
|
||||||
import { MonitoringExtension } from './index';
|
|
||||||
import { AssistantExtension } from './index';
|
import { AssistantExtension } from './index';
|
||||||
import { ModelExtension } from './index';
|
import { ModelExtension } from './index';
|
||||||
import * as Engines from './index';
|
import * as Engines from './index';
|
||||||
@ -14,10 +13,6 @@ describe('index.ts exports', () => {
|
|||||||
expect(InferenceExtension).toBeDefined();
|
expect(InferenceExtension).toBeDefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
test('should export MonitoringExtension', () => {
|
|
||||||
expect(MonitoringExtension).toBeDefined();
|
|
||||||
});
|
|
||||||
|
|
||||||
test('should export AssistantExtension', () => {
|
test('should export AssistantExtension', () => {
|
||||||
expect(AssistantExtension).toBeDefined();
|
expect(AssistantExtension).toBeDefined();
|
||||||
});
|
});
|
||||||
|
|||||||
@ -9,10 +9,7 @@ export { ConversationalExtension } from './conversational'
|
|||||||
*/
|
*/
|
||||||
export { InferenceExtension } from './inference'
|
export { InferenceExtension } from './inference'
|
||||||
|
|
||||||
/**
|
|
||||||
* Monitoring extension for system monitoring.
|
|
||||||
*/
|
|
||||||
export { MonitoringExtension } from './monitoring'
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Assistant extension for managing assistants.
|
* Assistant extension for managing assistants.
|
||||||
@ -33,3 +30,8 @@ export * from './engines'
|
|||||||
* Engines Management
|
* Engines Management
|
||||||
*/
|
*/
|
||||||
export * from './enginesManagement'
|
export * from './enginesManagement'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hardware Management
|
||||||
|
*/
|
||||||
|
export * from './hardwareManagement'
|
||||||
|
|||||||
@ -1,42 +0,0 @@
|
|||||||
|
|
||||||
import { ExtensionTypeEnum } from '../extension';
|
|
||||||
import { MonitoringExtension } from './monitoring';
|
|
||||||
|
|
||||||
it('should have the correct type', () => {
|
|
||||||
class TestMonitoringExtension extends MonitoringExtension {
|
|
||||||
getGpuSetting(): Promise<GpuSetting | undefined> {
|
|
||||||
throw new Error('Method not implemented.');
|
|
||||||
}
|
|
||||||
getResourcesInfo(): Promise<any> {
|
|
||||||
throw new Error('Method not implemented.');
|
|
||||||
}
|
|
||||||
getCurrentLoad(): Promise<any> {
|
|
||||||
throw new Error('Method not implemented.');
|
|
||||||
}
|
|
||||||
getOsInfo(): Promise<OperatingSystemInfo> {
|
|
||||||
throw new Error('Method not implemented.');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const monitoringExtension = new TestMonitoringExtension();
|
|
||||||
expect(monitoringExtension.type()).toBe(ExtensionTypeEnum.SystemMonitoring);
|
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
it('should create an instance of MonitoringExtension', () => {
|
|
||||||
class TestMonitoringExtension extends MonitoringExtension {
|
|
||||||
getGpuSetting(): Promise<GpuSetting | undefined> {
|
|
||||||
throw new Error('Method not implemented.');
|
|
||||||
}
|
|
||||||
getResourcesInfo(): Promise<any> {
|
|
||||||
throw new Error('Method not implemented.');
|
|
||||||
}
|
|
||||||
getCurrentLoad(): Promise<any> {
|
|
||||||
throw new Error('Method not implemented.');
|
|
||||||
}
|
|
||||||
getOsInfo(): Promise<OperatingSystemInfo> {
|
|
||||||
throw new Error('Method not implemented.');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const monitoringExtension = new TestMonitoringExtension();
|
|
||||||
expect(monitoringExtension).toBeInstanceOf(MonitoringExtension);
|
|
||||||
});
|
|
||||||
@ -1,20 +0,0 @@
|
|||||||
import { BaseExtension, ExtensionTypeEnum } from '../extension'
|
|
||||||
import { GpuSetting, MonitoringInterface, OperatingSystemInfo } from '../../types'
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Monitoring extension for system monitoring.
|
|
||||||
* @extends BaseExtension
|
|
||||||
*/
|
|
||||||
export abstract class MonitoringExtension extends BaseExtension implements MonitoringInterface {
|
|
||||||
/**
|
|
||||||
* Monitoring extension type.
|
|
||||||
*/
|
|
||||||
type(): ExtensionTypeEnum | undefined {
|
|
||||||
return ExtensionTypeEnum.SystemMonitoring
|
|
||||||
}
|
|
||||||
|
|
||||||
abstract getGpuSetting(): Promise<GpuSetting | undefined>
|
|
||||||
abstract getResourcesInfo(): Promise<any>
|
|
||||||
abstract getCurrentLoad(): Promise<any>
|
|
||||||
abstract getOsInfo(): Promise<OperatingSystemInfo>
|
|
||||||
}
|
|
||||||
@ -1,4 +1,5 @@
|
|||||||
export type AppConfiguration = {
|
export type AppConfiguration = {
|
||||||
data_folder: string
|
data_folder: string
|
||||||
quick_ask: boolean
|
quick_ask: boolean
|
||||||
|
distinct_id?: string
|
||||||
}
|
}
|
||||||
|
|||||||
@ -18,6 +18,7 @@ export type EngineMetadata = {
|
|||||||
template?: string
|
template?: string
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
explore_models_url?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
export type EngineVariant = {
|
export type EngineVariant = {
|
||||||
|
|||||||
55
core/src/types/hardware/index.ts
Normal file
55
core/src/types/hardware/index.ts
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
export type Cpu = {
|
||||||
|
arch: string
|
||||||
|
cores: number
|
||||||
|
instructions: string[]
|
||||||
|
model: string
|
||||||
|
usage: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export type GpuAdditionalInformation = {
|
||||||
|
compute_cap: string
|
||||||
|
driver_version: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export type Gpu = {
|
||||||
|
activated: boolean
|
||||||
|
additional_information?: GpuAdditionalInformation
|
||||||
|
free_vram: number
|
||||||
|
id: string
|
||||||
|
name: string
|
||||||
|
total_vram: number
|
||||||
|
uuid: string
|
||||||
|
version: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export type Os = {
|
||||||
|
name: string
|
||||||
|
version: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export type Power = {
|
||||||
|
battery_life: number
|
||||||
|
charging_status: string
|
||||||
|
is_power_saving: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
export type Ram = {
|
||||||
|
available: number
|
||||||
|
total: number
|
||||||
|
type: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export type Storage = {
|
||||||
|
available: number
|
||||||
|
total: number
|
||||||
|
type: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export type HardwareInformation = {
|
||||||
|
cpu: Cpu
|
||||||
|
gpus: Gpu[]
|
||||||
|
os: Os
|
||||||
|
power: Power
|
||||||
|
ram: Ram
|
||||||
|
storage: Storage
|
||||||
|
}
|
||||||
@ -4,7 +4,6 @@ import * as model from './model';
|
|||||||
import * as thread from './thread';
|
import * as thread from './thread';
|
||||||
import * as message from './message';
|
import * as message from './message';
|
||||||
import * as inference from './inference';
|
import * as inference from './inference';
|
||||||
import * as monitoring from './monitoring';
|
|
||||||
import * as file from './file';
|
import * as file from './file';
|
||||||
import * as config from './config';
|
import * as config from './config';
|
||||||
import * as huggingface from './huggingface';
|
import * as huggingface from './huggingface';
|
||||||
@ -18,7 +17,6 @@ import * as setting from './setting';
|
|||||||
expect(thread).toBeDefined();
|
expect(thread).toBeDefined();
|
||||||
expect(message).toBeDefined();
|
expect(message).toBeDefined();
|
||||||
expect(inference).toBeDefined();
|
expect(inference).toBeDefined();
|
||||||
expect(monitoring).toBeDefined();
|
|
||||||
expect(file).toBeDefined();
|
expect(file).toBeDefined();
|
||||||
expect(config).toBeDefined();
|
expect(config).toBeDefined();
|
||||||
expect(huggingface).toBeDefined();
|
expect(huggingface).toBeDefined();
|
||||||
|
|||||||
@ -3,7 +3,6 @@ export * from './model'
|
|||||||
export * from './thread'
|
export * from './thread'
|
||||||
export * from './message'
|
export * from './message'
|
||||||
export * from './inference'
|
export * from './inference'
|
||||||
export * from './monitoring'
|
|
||||||
export * from './file'
|
export * from './file'
|
||||||
export * from './config'
|
export * from './config'
|
||||||
export * from './huggingface'
|
export * from './huggingface'
|
||||||
@ -11,3 +10,4 @@ export * from './miscellaneous'
|
|||||||
export * from './api'
|
export * from './api'
|
||||||
export * from './setting'
|
export * from './setting'
|
||||||
export * from './engine'
|
export * from './engine'
|
||||||
|
export * from './hardware'
|
||||||
|
|||||||
@ -1,33 +1,25 @@
|
|||||||
|
import { GpuAdditionalInformation } from '../hardware'
|
||||||
|
|
||||||
export type SystemResourceInfo = {
|
export type SystemResourceInfo = {
|
||||||
memAvailable: number
|
memAvailable: number
|
||||||
}
|
}
|
||||||
|
|
||||||
export type RunMode = 'cpu' | 'gpu'
|
|
||||||
|
|
||||||
export type GpuSetting = {
|
export type GpuSetting = {
|
||||||
notify: boolean
|
|
||||||
run_mode: RunMode
|
|
||||||
nvidia_driver: {
|
|
||||||
exist: boolean
|
|
||||||
version: string
|
|
||||||
}
|
|
||||||
cuda: {
|
|
||||||
exist: boolean
|
|
||||||
version: string
|
|
||||||
}
|
|
||||||
gpus: GpuSettingInfo[]
|
gpus: GpuSettingInfo[]
|
||||||
gpu_highest_vram: string
|
|
||||||
gpus_in_use: string[]
|
|
||||||
is_initial: boolean
|
|
||||||
// TODO: This needs to be set based on user toggle in settings
|
// TODO: This needs to be set based on user toggle in settings
|
||||||
vulkan: boolean
|
vulkan: boolean
|
||||||
|
cpu?: any
|
||||||
}
|
}
|
||||||
|
|
||||||
export type GpuSettingInfo = {
|
export type GpuSettingInfo = {
|
||||||
|
activated: boolean
|
||||||
|
free_vram: number
|
||||||
id: string
|
id: string
|
||||||
vram: string
|
|
||||||
name: string
|
name: string
|
||||||
arch?: string
|
total_vram: number
|
||||||
|
uuid: string
|
||||||
|
version: string
|
||||||
|
additional_information?: GpuAdditionalInformation
|
||||||
}
|
}
|
||||||
|
|
||||||
export type SystemInformation = {
|
export type SystemInformation = {
|
||||||
@ -42,9 +34,6 @@ export type SupportedPlatform = SupportedPlatformTuple[number]
|
|||||||
export type OperatingSystemInfo = {
|
export type OperatingSystemInfo = {
|
||||||
platform: SupportedPlatform | 'unknown'
|
platform: SupportedPlatform | 'unknown'
|
||||||
arch: string
|
arch: string
|
||||||
release: string
|
|
||||||
machine: string
|
|
||||||
version: string
|
|
||||||
totalMem: number
|
totalMem: number
|
||||||
freeMem: number
|
freeMem: number
|
||||||
}
|
}
|
||||||
|
|||||||
@ -71,7 +71,7 @@ export type Model = {
|
|||||||
/**
|
/**
|
||||||
* The model identifier, modern version of id.
|
* The model identifier, modern version of id.
|
||||||
*/
|
*/
|
||||||
mode?: string
|
model?: string
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Human-readable name that is used for UI.
|
* Human-readable name that is used for UI.
|
||||||
@ -150,6 +150,7 @@ export type ModelSettingParams = {
|
|||||||
*/
|
*/
|
||||||
export type ModelRuntimeParams = {
|
export type ModelRuntimeParams = {
|
||||||
temperature?: number
|
temperature?: number
|
||||||
|
max_temperature?: number
|
||||||
token_limit?: number
|
token_limit?: number
|
||||||
top_k?: number
|
top_k?: number
|
||||||
top_p?: number
|
top_p?: number
|
||||||
|
|||||||
@ -1,13 +0,0 @@
|
|||||||
import * as monitoringInterface from './monitoringInterface'
|
|
||||||
import * as resourceInfo from './resourceInfo'
|
|
||||||
|
|
||||||
import * as index from './index'
|
|
||||||
|
|
||||||
it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
|
|
||||||
for (const key in monitoringInterface) {
|
|
||||||
expect(index[key]).toBe(monitoringInterface[key])
|
|
||||||
}
|
|
||||||
for (const key in resourceInfo) {
|
|
||||||
expect(index[key]).toBe(resourceInfo[key])
|
|
||||||
}
|
|
||||||
})
|
|
||||||
@ -1,2 +0,0 @@
|
|||||||
export * from './monitoringInterface'
|
|
||||||
export * from './resourceInfo'
|
|
||||||
@ -1,29 +0,0 @@
|
|||||||
import { GpuSetting, OperatingSystemInfo } from '../miscellaneous'
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Monitoring extension for system monitoring.
|
|
||||||
* @extends BaseExtension
|
|
||||||
*/
|
|
||||||
export interface MonitoringInterface {
|
|
||||||
/**
|
|
||||||
* Returns information about the system resources.
|
|
||||||
* @returns {Promise<any>} A promise that resolves with the system resources information.
|
|
||||||
*/
|
|
||||||
getResourcesInfo(): Promise<any>
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the current system load.
|
|
||||||
* @returns {Promise<any>} A promise that resolves with the current system load.
|
|
||||||
*/
|
|
||||||
getCurrentLoad(): Promise<any>
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the GPU configuration.
|
|
||||||
*/
|
|
||||||
getGpuSetting(): Promise<GpuSetting | undefined>
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns information about the operating system.
|
|
||||||
*/
|
|
||||||
getOsInfo(): Promise<OperatingSystemInfo>
|
|
||||||
}
|
|
||||||
@ -1,6 +0,0 @@
|
|||||||
export type ResourceInfo = {
|
|
||||||
mem: {
|
|
||||||
totalMemory: number
|
|
||||||
usedMemory: number
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -55,7 +55,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
|||||||
"productName": "Conversational",
|
"productName": "Conversational",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"description": "This extension enables conversations and state persistence via your filesystem",
|
"description": "This extension enables conversations and state persistence via your filesystem.",
|
||||||
"url": "extension://@janhq/conversational-extension/dist/index.js"
|
"url": "extension://@janhq/conversational-extension/dist/index.js"
|
||||||
},
|
},
|
||||||
"@janhq/inference-anthropic-extension": {
|
"@janhq/inference-anthropic-extension": {
|
||||||
@ -70,7 +70,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
|||||||
"productName": "Anthropic Inference Engine",
|
"productName": "Anthropic Inference Engine",
|
||||||
"version": "1.0.2",
|
"version": "1.0.2",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"description": "This extension enables Anthropic chat completion API calls",
|
"description": "This extension enables Anthropic chat completion API calls.",
|
||||||
"url": "extension://@janhq/inference-anthropic-extension/dist/index.js"
|
"url": "extension://@janhq/inference-anthropic-extension/dist/index.js"
|
||||||
},
|
},
|
||||||
"@janhq/inference-triton-trt-llm-extension": {
|
"@janhq/inference-triton-trt-llm-extension": {
|
||||||
@ -85,7 +85,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
|||||||
"productName": "Triton-TRT-LLM Inference Engine",
|
"productName": "Triton-TRT-LLM Inference Engine",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option",
|
"description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option.",
|
||||||
"url": "extension://@janhq/inference-triton-trt-llm-extension/dist/index.js"
|
"url": "extension://@janhq/inference-triton-trt-llm-extension/dist/index.js"
|
||||||
},
|
},
|
||||||
"@janhq/inference-mistral-extension": {
|
"@janhq/inference-mistral-extension": {
|
||||||
@ -100,7 +100,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
|||||||
"productName": "MistralAI Inference Engine",
|
"productName": "MistralAI Inference Engine",
|
||||||
"version": "1.0.1",
|
"version": "1.0.1",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"description": "This extension enables Mistral chat completion API calls",
|
"description": "This extension enables Mistral chat completion API calls.",
|
||||||
"url": "extension://@janhq/inference-mistral-extension/dist/index.js"
|
"url": "extension://@janhq/inference-mistral-extension/dist/index.js"
|
||||||
},
|
},
|
||||||
"@janhq/inference-martian-extension": {
|
"@janhq/inference-martian-extension": {
|
||||||
@ -115,7 +115,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
|||||||
"productName": "Martian Inference Engine",
|
"productName": "Martian Inference Engine",
|
||||||
"version": "1.0.1",
|
"version": "1.0.1",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"description": "This extension enables Martian chat completion API calls",
|
"description": "This extension enables Martian chat completion API calls.",
|
||||||
"url": "extension://@janhq/inference-martian-extension/dist/index.js"
|
"url": "extension://@janhq/inference-martian-extension/dist/index.js"
|
||||||
},
|
},
|
||||||
"@janhq/inference-openrouter-extension": {
|
"@janhq/inference-openrouter-extension": {
|
||||||
@ -130,7 +130,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
|||||||
"productName": "OpenRouter Inference Engine",
|
"productName": "OpenRouter Inference Engine",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"description": "This extension enables Open Router chat completion API calls",
|
"description": "This extension enables Open Router chat completion API calls.",
|
||||||
"url": "extension://@janhq/inference-openrouter-extension/dist/index.js"
|
"url": "extension://@janhq/inference-openrouter-extension/dist/index.js"
|
||||||
},
|
},
|
||||||
"@janhq/inference-nvidia-extension": {
|
"@janhq/inference-nvidia-extension": {
|
||||||
@ -145,7 +145,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
|||||||
"productName": "NVIDIA NIM Inference Engine",
|
"productName": "NVIDIA NIM Inference Engine",
|
||||||
"version": "1.0.1",
|
"version": "1.0.1",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"description": "This extension enables NVIDIA chat completion API calls",
|
"description": "This extension enables NVIDIA chat completion API calls.",
|
||||||
"url": "extension://@janhq/inference-nvidia-extension/dist/index.js"
|
"url": "extension://@janhq/inference-nvidia-extension/dist/index.js"
|
||||||
},
|
},
|
||||||
"@janhq/inference-groq-extension": {
|
"@janhq/inference-groq-extension": {
|
||||||
@ -160,7 +160,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
|||||||
"productName": "Groq Inference Engine",
|
"productName": "Groq Inference Engine",
|
||||||
"version": "1.0.1",
|
"version": "1.0.1",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"description": "This extension enables fast Groq chat completion API calls",
|
"description": "This extension enables fast Groq chat completion API calls.",
|
||||||
"url": "extension://@janhq/inference-groq-extension/dist/index.js"
|
"url": "extension://@janhq/inference-groq-extension/dist/index.js"
|
||||||
},
|
},
|
||||||
"@janhq/inference-openai-extension": {
|
"@janhq/inference-openai-extension": {
|
||||||
@ -175,7 +175,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
|||||||
"productName": "OpenAI Inference Engine",
|
"productName": "OpenAI Inference Engine",
|
||||||
"version": "1.0.2",
|
"version": "1.0.2",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"description": "This extension enables OpenAI chat completion API calls",
|
"description": "This extension enables OpenAI chat completion API calls.",
|
||||||
"url": "extension://@janhq/inference-openai-extension/dist/index.js"
|
"url": "extension://@janhq/inference-openai-extension/dist/index.js"
|
||||||
},
|
},
|
||||||
"@janhq/inference-cohere-extension": {
|
"@janhq/inference-cohere-extension": {
|
||||||
@ -190,7 +190,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
|||||||
"productName": "Cohere Inference Engine",
|
"productName": "Cohere Inference Engine",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"description": "This extension enables Cohere chat completion API calls",
|
"description": "This extension enables Cohere chat completion API calls.",
|
||||||
"url": "extension://@janhq/inference-cohere-extension/dist/index.js"
|
"url": "extension://@janhq/inference-cohere-extension/dist/index.js"
|
||||||
},
|
},
|
||||||
"@janhq/model-extension": {
|
"@janhq/model-extension": {
|
||||||
@ -205,7 +205,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
|||||||
"productName": "Model Management",
|
"productName": "Model Management",
|
||||||
"version": "1.0.33",
|
"version": "1.0.33",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"description": "Model Management Extension provides model exploration and seamless downloads",
|
"description": "Model Management Extension provides model exploration and seamless downloads.",
|
||||||
"url": "extension://@janhq/model-extension/dist/index.js"
|
"url": "extension://@janhq/model-extension/dist/index.js"
|
||||||
},
|
},
|
||||||
"@janhq/monitoring-extension": {
|
"@janhq/monitoring-extension": {
|
||||||
@ -220,7 +220,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
|||||||
"productName": "System Monitoring",
|
"productName": "System Monitoring",
|
||||||
"version": "1.0.10",
|
"version": "1.0.10",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"description": "This extension provides system health and OS level data",
|
"description": "This extension provides system health and OS level data.",
|
||||||
"url": "extension://@janhq/monitoring-extension/dist/index.js"
|
"url": "extension://@janhq/monitoring-extension/dist/index.js"
|
||||||
},
|
},
|
||||||
"@janhq/assistant-extension": {
|
"@janhq/assistant-extension": {
|
||||||
@ -235,7 +235,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
|
|||||||
"productName": "Jan Assistant",
|
"productName": "Jan Assistant",
|
||||||
"version": "1.0.1",
|
"version": "1.0.1",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models",
|
"description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models.",
|
||||||
"url": "extension://@janhq/assistant-extension/dist/index.js"
|
"url": "extension://@janhq/assistant-extension/dist/index.js"
|
||||||
},
|
},
|
||||||
"@janhq/tensorrt-llm-extension": {
|
"@janhq/tensorrt-llm-extension": {
|
||||||
|
|||||||
@ -47,8 +47,8 @@ To add a new remote engine:
|
|||||||
|-------|-------------|----------|
|
|-------|-------------|----------|
|
||||||
| Engine Name | Name for your engine (e.g., "OpenAI", "Claude") | ✓ |
|
| Engine Name | Name for your engine (e.g., "OpenAI", "Claude") | ✓ |
|
||||||
| API URL | The base URL of the provider's API | ✓ |
|
| API URL | The base URL of the provider's API | ✓ |
|
||||||
| API Key | Your authentication key from the provider | ✓ |
|
| API Key | Your authentication key to activate this engine | ✓ |
|
||||||
| Model List URL | URL for fetching available models | |
|
| Model List URL | The endpoint URL to fetch available models |
|
||||||
| API Key Template | Custom authorization header format | |
|
| API Key Template | Custom authorization header format | |
|
||||||
| Request Format Conversion | Function to convert Jan's request format to provider's format | |
|
| Request Format Conversion | Function to convert Jan's request format to provider's format | |
|
||||||
| Response Format Conversion | Function to convert provider's response format to Jan's format | |
|
| Response Format Conversion | Function to convert provider's response format to Jan's format | |
|
||||||
|
|||||||
@ -28,6 +28,7 @@ import { setupReactDevTool } from './utils/dev'
|
|||||||
import { trayManager } from './managers/tray'
|
import { trayManager } from './managers/tray'
|
||||||
import { logSystemInfo } from './utils/system'
|
import { logSystemInfo } from './utils/system'
|
||||||
import { registerGlobalShortcuts } from './utils/shortcut'
|
import { registerGlobalShortcuts } from './utils/shortcut'
|
||||||
|
import { registerLogger } from './utils/logger'
|
||||||
|
|
||||||
const preloadPath = join(__dirname, 'preload.js')
|
const preloadPath = join(__dirname, 'preload.js')
|
||||||
const rendererPath = join(__dirname, '..', 'renderer')
|
const rendererPath = join(__dirname, '..', 'renderer')
|
||||||
@ -79,6 +80,7 @@ app
|
|||||||
})
|
})
|
||||||
.then(setupCore)
|
.then(setupCore)
|
||||||
.then(createUserSpace)
|
.then(createUserSpace)
|
||||||
|
.then(registerLogger)
|
||||||
.then(migrate)
|
.then(migrate)
|
||||||
.then(setupExtensions)
|
.then(setupExtensions)
|
||||||
.then(setupMenu)
|
.then(setupMenu)
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "jan",
|
"name": "jan",
|
||||||
"version": "0.1.4",
|
"version": "0.1.1737985524",
|
||||||
"main": "./build/main.js",
|
"main": "./build/main.js",
|
||||||
"author": "Jan <service@jan.ai>",
|
"author": "Jan <service@jan.ai>",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
|||||||
@ -1,16 +1,28 @@
|
|||||||
import fs from 'fs'
|
import {
|
||||||
|
createWriteStream,
|
||||||
|
existsSync,
|
||||||
|
mkdirSync,
|
||||||
|
readdir,
|
||||||
|
stat,
|
||||||
|
unlink,
|
||||||
|
writeFileSync,
|
||||||
|
} from 'fs'
|
||||||
import util from 'util'
|
import util from 'util'
|
||||||
import {
|
import {
|
||||||
getAppConfigurations,
|
getAppConfigurations,
|
||||||
getJanDataFolderPath,
|
getJanDataFolderPath,
|
||||||
Logger,
|
Logger,
|
||||||
|
LoggerManager,
|
||||||
} from '@janhq/core/node'
|
} from '@janhq/core/node'
|
||||||
import path, { join } from 'path'
|
import path, { join } from 'path'
|
||||||
|
|
||||||
export class FileLogger extends Logger {
|
/**
|
||||||
|
* File Logger
|
||||||
|
*/
|
||||||
|
export class FileLogger implements Logger {
|
||||||
name = 'file'
|
name = 'file'
|
||||||
logCleaningInterval: number = 120000
|
logCleaningInterval: number = 120000
|
||||||
timeout: NodeJS.Timeout | null = null
|
timeout: NodeJS.Timeout | undefined
|
||||||
appLogPath: string = './'
|
appLogPath: string = './'
|
||||||
logEnabled: boolean = true
|
logEnabled: boolean = true
|
||||||
|
|
||||||
@ -18,14 +30,13 @@ export class FileLogger extends Logger {
|
|||||||
logEnabled: boolean = true,
|
logEnabled: boolean = true,
|
||||||
logCleaningInterval: number = 120000
|
logCleaningInterval: number = 120000
|
||||||
) {
|
) {
|
||||||
super()
|
|
||||||
this.logEnabled = logEnabled
|
this.logEnabled = logEnabled
|
||||||
if (logCleaningInterval) this.logCleaningInterval = logCleaningInterval
|
if (logCleaningInterval) this.logCleaningInterval = logCleaningInterval
|
||||||
|
|
||||||
const appConfigurations = getAppConfigurations()
|
const appConfigurations = getAppConfigurations()
|
||||||
const logFolderPath = join(appConfigurations.data_folder, 'logs')
|
const logFolderPath = join(appConfigurations.data_folder, 'logs')
|
||||||
if (!fs.existsSync(logFolderPath)) {
|
if (!existsSync(logFolderPath)) {
|
||||||
fs.mkdirSync(logFolderPath, { recursive: true })
|
mkdirSync(logFolderPath, { recursive: true })
|
||||||
}
|
}
|
||||||
|
|
||||||
this.appLogPath = join(logFolderPath, 'app.log')
|
this.appLogPath = join(logFolderPath, 'app.log')
|
||||||
@ -69,8 +80,8 @@ export class FileLogger extends Logger {
|
|||||||
const logDirectory = path.join(getJanDataFolderPath(), 'logs')
|
const logDirectory = path.join(getJanDataFolderPath(), 'logs')
|
||||||
// Perform log cleaning
|
// Perform log cleaning
|
||||||
const currentDate = new Date()
|
const currentDate = new Date()
|
||||||
if (fs.existsSync(logDirectory))
|
if (existsSync(logDirectory))
|
||||||
fs.readdir(logDirectory, (err, files) => {
|
readdir(logDirectory, (err, files) => {
|
||||||
if (err) {
|
if (err) {
|
||||||
console.error('Error reading log directory:', err)
|
console.error('Error reading log directory:', err)
|
||||||
return
|
return
|
||||||
@ -78,7 +89,7 @@ export class FileLogger extends Logger {
|
|||||||
|
|
||||||
files.forEach((file) => {
|
files.forEach((file) => {
|
||||||
const filePath = path.join(logDirectory, file)
|
const filePath = path.join(logDirectory, file)
|
||||||
fs.stat(filePath, (err, stats) => {
|
stat(filePath, (err, stats) => {
|
||||||
if (err) {
|
if (err) {
|
||||||
console.error('Error getting file stats:', err)
|
console.error('Error getting file stats:', err)
|
||||||
return
|
return
|
||||||
@ -86,7 +97,7 @@ export class FileLogger extends Logger {
|
|||||||
|
|
||||||
// Check size
|
// Check size
|
||||||
if (stats.size > size) {
|
if (stats.size > size) {
|
||||||
fs.unlink(filePath, (err) => {
|
unlink(filePath, (err) => {
|
||||||
if (err) {
|
if (err) {
|
||||||
console.error('Error deleting log file:', err)
|
console.error('Error deleting log file:', err)
|
||||||
return
|
return
|
||||||
@ -103,7 +114,7 @@ export class FileLogger extends Logger {
|
|||||||
(1000 * 3600 * 24)
|
(1000 * 3600 * 24)
|
||||||
)
|
)
|
||||||
if (daysDifference > days) {
|
if (daysDifference > days) {
|
||||||
fs.unlink(filePath, (err) => {
|
unlink(filePath, (err) => {
|
||||||
if (err) {
|
if (err) {
|
||||||
console.error('Error deleting log file:', err)
|
console.error('Error deleting log file:', err)
|
||||||
return
|
return
|
||||||
@ -124,15 +135,20 @@ export class FileLogger extends Logger {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write log function implementation
|
||||||
|
* @param message
|
||||||
|
* @param logPath
|
||||||
|
*/
|
||||||
const writeLog = (message: string, logPath: string) => {
|
const writeLog = (message: string, logPath: string) => {
|
||||||
if (!fs.existsSync(logPath)) {
|
if (!existsSync(logPath)) {
|
||||||
const logDirectory = path.join(getJanDataFolderPath(), 'logs')
|
const logDirectory = path.join(getJanDataFolderPath(), 'logs')
|
||||||
if (!fs.existsSync(logDirectory)) {
|
if (!existsSync(logDirectory)) {
|
||||||
fs.mkdirSync(logDirectory)
|
mkdirSync(logDirectory)
|
||||||
}
|
}
|
||||||
fs.writeFileSync(logPath, message)
|
writeFileSync(logPath, message)
|
||||||
} else {
|
} else {
|
||||||
const logFile = fs.createWriteStream(logPath, {
|
const logFile = createWriteStream(logPath, {
|
||||||
flags: 'a',
|
flags: 'a',
|
||||||
})
|
})
|
||||||
logFile.write(util.format(message) + '\n')
|
logFile.write(util.format(message) + '\n')
|
||||||
@ -140,3 +156,12 @@ const writeLog = (message: string, logPath: string) => {
|
|||||||
console.debug(message)
|
console.debug(message)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register logger for global application logging
|
||||||
|
*/
|
||||||
|
export const registerLogger = () => {
|
||||||
|
const logger = new FileLogger()
|
||||||
|
LoggerManager.instance().register(logger)
|
||||||
|
logger.cleanLogs()
|
||||||
|
}
|
||||||
@ -6,6 +6,8 @@ import groq from './resources/groq.json' with { type: 'json' }
|
|||||||
import martian from './resources/martian.json' with { type: 'json' }
|
import martian from './resources/martian.json' with { type: 'json' }
|
||||||
import mistral from './resources/mistral.json' with { type: 'json' }
|
import mistral from './resources/mistral.json' with { type: 'json' }
|
||||||
import nvidia from './resources/nvidia.json' with { type: 'json' }
|
import nvidia from './resources/nvidia.json' with { type: 'json' }
|
||||||
|
import deepseek from './resources/deepseek.json' with { type: 'json' }
|
||||||
|
import googleGemini from './resources/google_gemini.json' with { type: 'json' }
|
||||||
|
|
||||||
import anthropicModels from './models/anthropic.json' with { type: 'json' }
|
import anthropicModels from './models/anthropic.json' with { type: 'json' }
|
||||||
import cohereModels from './models/cohere.json' with { type: 'json' }
|
import cohereModels from './models/cohere.json' with { type: 'json' }
|
||||||
@ -15,6 +17,8 @@ import groqModels from './models/groq.json' with { type: 'json' }
|
|||||||
import martianModels from './models/martian.json' with { type: 'json' }
|
import martianModels from './models/martian.json' with { type: 'json' }
|
||||||
import mistralModels from './models/mistral.json' with { type: 'json' }
|
import mistralModels from './models/mistral.json' with { type: 'json' }
|
||||||
import nvidiaModels from './models/nvidia.json' with { type: 'json' }
|
import nvidiaModels from './models/nvidia.json' with { type: 'json' }
|
||||||
|
import deepseekModels from './models/deepseek.json' with { type: 'json' }
|
||||||
|
import googleGeminiModels from './models/google_gemini.json' with { type: 'json' }
|
||||||
|
|
||||||
const engines = [
|
const engines = [
|
||||||
anthropic,
|
anthropic,
|
||||||
@ -25,6 +29,8 @@ const engines = [
|
|||||||
mistral,
|
mistral,
|
||||||
martian,
|
martian,
|
||||||
nvidia,
|
nvidia,
|
||||||
|
deepseek,
|
||||||
|
googleGemini,
|
||||||
]
|
]
|
||||||
const models = [
|
const models = [
|
||||||
...anthropicModels,
|
...anthropicModels,
|
||||||
@ -35,5 +41,7 @@ const models = [
|
|||||||
...mistralModels,
|
...mistralModels,
|
||||||
...martianModels,
|
...martianModels,
|
||||||
...nvidiaModels,
|
...nvidiaModels,
|
||||||
|
...deepseekModels,
|
||||||
|
...googleGeminiModels,
|
||||||
]
|
]
|
||||||
export { engines, models }
|
export { engines, models }
|
||||||
|
|||||||
@ -8,6 +8,7 @@
|
|||||||
"inference_params": {
|
"inference_params": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"temperature": 0.7,
|
"temperature": 0.7,
|
||||||
|
"max_temperature": 1.0,
|
||||||
"stream": true
|
"stream": true
|
||||||
},
|
},
|
||||||
"engine": "anthropic"
|
"engine": "anthropic"
|
||||||
@ -21,6 +22,7 @@
|
|||||||
"inference_params": {
|
"inference_params": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"temperature": 0.7,
|
"temperature": 0.7,
|
||||||
|
"max_temperature": 1.0,
|
||||||
"stream": true
|
"stream": true
|
||||||
},
|
},
|
||||||
"engine": "anthropic"
|
"engine": "anthropic"
|
||||||
@ -34,6 +36,7 @@
|
|||||||
"inference_params": {
|
"inference_params": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"temperature": 0.7,
|
"temperature": 0.7,
|
||||||
|
"max_temperature": 1.0,
|
||||||
"stream": true
|
"stream": true
|
||||||
},
|
},
|
||||||
"engine": "anthropic"
|
"engine": "anthropic"
|
||||||
|
|||||||
@ -8,6 +8,7 @@
|
|||||||
"inference_params": {
|
"inference_params": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"temperature": 0.7,
|
"temperature": 0.7,
|
||||||
|
"max_temperature": 1.0,
|
||||||
"stream": false
|
"stream": false
|
||||||
},
|
},
|
||||||
"engine": "cohere"
|
"engine": "cohere"
|
||||||
@ -21,6 +22,7 @@
|
|||||||
"inference_params": {
|
"inference_params": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"temperature": 0.7,
|
"temperature": 0.7,
|
||||||
|
"max_temperature": 1.0,
|
||||||
"stream": false
|
"stream": false
|
||||||
},
|
},
|
||||||
"engine": "cohere"
|
"engine": "cohere"
|
||||||
|
|||||||
28
extensions/engine-management-extension/models/deepseek.json
Normal file
28
extensions/engine-management-extension/models/deepseek.json
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "deepseek-chat",
|
||||||
|
"object": "model",
|
||||||
|
"name": "DeepSeek Chat",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "The deepseek-chat model has been upgraded to DeepSeek-V3. deepseek-reasoner points to the new model DeepSeek-R1",
|
||||||
|
"inference_params": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"temperature": 0.6,
|
||||||
|
"stream": true
|
||||||
|
},
|
||||||
|
"engine": "deepseek"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "deepseek-reasoner",
|
||||||
|
"object": "model",
|
||||||
|
"name": "DeepSeek R1",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "CoT (Chain of Thought) is the reasoning content deepseek-reasoner gives before output the final answer. For details, please refer to Reasoning Model.",
|
||||||
|
"inference_params": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"temperature": 0.6,
|
||||||
|
"stream": true
|
||||||
|
},
|
||||||
|
"engine": "deepseek"
|
||||||
|
}
|
||||||
|
]
|
||||||
@ -0,0 +1,67 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "gemini-2.0-flash",
|
||||||
|
"object": "model",
|
||||||
|
"name": "Gemini 2.0 Flash",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "Gemini 2.0 Flash delivers next-gen features and improved capabilities, including superior speed, native tool use, multimodal generation, and a 1M token context window.",
|
||||||
|
"inference_params": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"temperature": 0.6,
|
||||||
|
"stream": true
|
||||||
|
},
|
||||||
|
"engine": "google_gemini"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "gemini-2.0-flash-lite-preview",
|
||||||
|
"object": "model",
|
||||||
|
"name": "Gemini 2.0 Flash-Lite Preview",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "A Gemini 2.0 Flash model optimized for cost efficiency and low latency.",
|
||||||
|
"inference_params": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"temperature": 0.6,
|
||||||
|
"stream": true
|
||||||
|
},
|
||||||
|
"engine": "google_gemini"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "gemini-1.5-flash",
|
||||||
|
"object": "model",
|
||||||
|
"name": "Gemini 1.5 Flash",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "Gemini 1.5 Flash is a fast and versatile multimodal model for scaling across diverse tasks.",
|
||||||
|
"inference_params": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"temperature": 0.6,
|
||||||
|
"stream": true
|
||||||
|
},
|
||||||
|
"engine": "google_gemini"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "gemini-1.5-flash-8b",
|
||||||
|
"object": "model",
|
||||||
|
"name": "Gemini 1.5 Flash-8B",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks.",
|
||||||
|
"inference_params": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"temperature": 0.6,
|
||||||
|
"stream": true
|
||||||
|
},
|
||||||
|
"engine": "google_gemini"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "gemini-1.5-pro",
|
||||||
|
"object": "model",
|
||||||
|
"name": "Gemini 1.5 Pro",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "Gemini 1.5 Pro is a mid-size multimodal model that is optimized for a wide-range of reasoning tasks. 1.5 Pro can process large amounts of data at once, including 2 hours of video, 19 hours of audio, codebases with 60,000 lines of code, or 2,000 pages of text. ",
|
||||||
|
"inference_params": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"temperature": 0.6,
|
||||||
|
"stream": true
|
||||||
|
},
|
||||||
|
"engine": "google_gemini"
|
||||||
|
}
|
||||||
|
]
|
||||||
@ -8,6 +8,7 @@
|
|||||||
"inference_params": {
|
"inference_params": {
|
||||||
"max_tokens": 32000,
|
"max_tokens": 32000,
|
||||||
"temperature": 0.7,
|
"temperature": 0.7,
|
||||||
|
"max_temperature": 1.0,
|
||||||
"top_p": 0.95,
|
"top_p": 0.95,
|
||||||
"stream": true
|
"stream": true
|
||||||
},
|
},
|
||||||
@ -22,6 +23,7 @@
|
|||||||
"inference_params": {
|
"inference_params": {
|
||||||
"max_tokens": 32000,
|
"max_tokens": 32000,
|
||||||
"temperature": 0.7,
|
"temperature": 0.7,
|
||||||
|
"max_temperature": 1.0,
|
||||||
"top_p": 0.95,
|
"top_p": 0.95,
|
||||||
"stream": true
|
"stream": true
|
||||||
},
|
},
|
||||||
@ -36,6 +38,7 @@
|
|||||||
"inference_params": {
|
"inference_params": {
|
||||||
"max_tokens": 32000,
|
"max_tokens": 32000,
|
||||||
"temperature": 0.7,
|
"temperature": 0.7,
|
||||||
|
"max_temperature": 1.0,
|
||||||
"top_p": 0.95,
|
"top_p": 0.95,
|
||||||
"stream": true
|
"stream": true
|
||||||
},
|
},
|
||||||
|
|||||||
@ -8,6 +8,7 @@
|
|||||||
"inference_params": {
|
"inference_params": {
|
||||||
"max_tokens": 1024,
|
"max_tokens": 1024,
|
||||||
"temperature": 0.3,
|
"temperature": 0.3,
|
||||||
|
"max_temperature": 1.0,
|
||||||
"top_p": 1,
|
"top_p": 1,
|
||||||
"stream": false,
|
"stream": false,
|
||||||
"frequency_penalty": 0,
|
"frequency_penalty": 0,
|
||||||
|
|||||||
@ -79,12 +79,7 @@
|
|||||||
"description": "OpenAI o1 is a new model with complex reasoning",
|
"description": "OpenAI o1 is a new model with complex reasoning",
|
||||||
"format": "api",
|
"format": "api",
|
||||||
"inference_params": {
|
"inference_params": {
|
||||||
"max_tokens": 100000,
|
"max_tokens": 100000
|
||||||
"temperature": 1,
|
|
||||||
"top_p": 1,
|
|
||||||
"stream": true,
|
|
||||||
"frequency_penalty": 0,
|
|
||||||
"presence_penalty": 0
|
|
||||||
},
|
},
|
||||||
"engine": "openai"
|
"engine": "openai"
|
||||||
},
|
},
|
||||||
@ -97,11 +92,7 @@
|
|||||||
"format": "api",
|
"format": "api",
|
||||||
"inference_params": {
|
"inference_params": {
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"temperature": 1,
|
"stream": true
|
||||||
"top_p": 1,
|
|
||||||
"stream": true,
|
|
||||||
"frequency_penalty": 0,
|
|
||||||
"presence_penalty": 0
|
|
||||||
},
|
},
|
||||||
"engine": "openai"
|
"engine": "openai"
|
||||||
},
|
},
|
||||||
@ -114,11 +105,20 @@
|
|||||||
"format": "api",
|
"format": "api",
|
||||||
"inference_params": {
|
"inference_params": {
|
||||||
"max_tokens": 65536,
|
"max_tokens": 65536,
|
||||||
"temperature": 1,
|
"stream": true
|
||||||
"top_p": 1,
|
},
|
||||||
"stream": true,
|
"engine": "openai"
|
||||||
"frequency_penalty": 0,
|
},
|
||||||
"presence_penalty": 0
|
{
|
||||||
|
"model": "o3-mini",
|
||||||
|
"object": "model",
|
||||||
|
"name": "OpenAI o3-mini",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "OpenAI most recent reasoning model, providing high intelligence at the same cost and latency targets of o1-mini.",
|
||||||
|
"format": "api",
|
||||||
|
"inference_params": {
|
||||||
|
"max_tokens": 100000,
|
||||||
|
"stream": true
|
||||||
},
|
},
|
||||||
"engine": "openai"
|
"engine": "openai"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,16 +1,91 @@
|
|||||||
[
|
[
|
||||||
{
|
{
|
||||||
"model": "open-router-auto",
|
"model": "deepseek/deepseek-r1:free",
|
||||||
"object": "model",
|
"object": "model",
|
||||||
"name": "OpenRouter",
|
"name": "DeepSeek: R1",
|
||||||
"version": "1.0",
|
"version": "1.0",
|
||||||
"description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
|
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
|
||||||
"inference_params": {
|
"inference_params": {
|
||||||
"max_tokens": 128000,
|
|
||||||
"temperature": 0.7,
|
"temperature": 0.7,
|
||||||
"top_p": 0.95,
|
"top_p": 0.95,
|
||||||
"frequency_penalty": 0,
|
"frequency_penalty": 0,
|
||||||
"presence_penalty": 0
|
"presence_penalty": 0,
|
||||||
|
"stream": true
|
||||||
|
},
|
||||||
|
"engine": "openrouter"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "deepseek/deepseek-r1-distill-llama-70b:free",
|
||||||
|
"object": "model",
|
||||||
|
"name": "DeepSeek: R1 Distill Llama 70B",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
|
||||||
|
"inference_params": {
|
||||||
|
"temperature": 0.7,
|
||||||
|
"top_p": 0.95,
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0,
|
||||||
|
"stream": true
|
||||||
|
},
|
||||||
|
"engine": "openrouter"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "deepseek/deepseek-r1-distill-llama-70b:free",
|
||||||
|
"object": "model",
|
||||||
|
"name": "DeepSeek: R1 Distill Llama 70B",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
|
||||||
|
"inference_params": {
|
||||||
|
"temperature": 0.7,
|
||||||
|
"top_p": 0.95,
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0,
|
||||||
|
"stream": true
|
||||||
|
},
|
||||||
|
"engine": "openrouter"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "meta-llama/llama-3.1-405b-instruct:free",
|
||||||
|
"object": "model",
|
||||||
|
"name": "Meta: Llama 3.1 405B Instruct",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
|
||||||
|
"inference_params": {
|
||||||
|
"temperature": 0.7,
|
||||||
|
"top_p": 0.95,
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0,
|
||||||
|
"stream": true
|
||||||
|
},
|
||||||
|
"engine": "openrouter"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen/qwen-vl-plus:free",
|
||||||
|
"object": "model",
|
||||||
|
"name": "Qwen: Qwen VL Plus",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
|
||||||
|
"inference_params": {
|
||||||
|
"temperature": 0.7,
|
||||||
|
"top_p": 0.95,
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0,
|
||||||
|
"stream": true
|
||||||
|
},
|
||||||
|
"engine": "openrouter"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen/qwen2.5-vl-72b-instruct:free",
|
||||||
|
"object": "model",
|
||||||
|
"name": "Qwen: Qwen2.5 VL 72B Instruct",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
|
||||||
|
"inference_params": {
|
||||||
|
"temperature": 0.7,
|
||||||
|
"top_p": 0.95,
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0,
|
||||||
|
"stream": true
|
||||||
},
|
},
|
||||||
"engine": "openrouter"
|
"engine": "openrouter"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -29,12 +29,10 @@
|
|||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@janhq/core": "../../core/package.tgz",
|
"@janhq/core": "../../core/package.tgz",
|
||||||
"cpu-instructions": "^0.0.13",
|
|
||||||
"ky": "^1.7.2",
|
"ky": "^1.7.2",
|
||||||
"p-queue": "^8.0.1"
|
"p-queue": "^8.0.1"
|
||||||
},
|
},
|
||||||
"bundledDependencies": [
|
"bundledDependencies": [
|
||||||
"cpu-instructions",
|
|
||||||
"@janhq/core"
|
"@janhq/core"
|
||||||
],
|
],
|
||||||
"engines": {
|
"engines": {
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"id": "@janhq/inference-anthropic-extension",
|
"id": "anthropic",
|
||||||
"type": "remote",
|
"type": "remote",
|
||||||
"engine": "anthropic",
|
"engine": "anthropic",
|
||||||
"url": "https://console.anthropic.com/settings/keys",
|
"url": "https://console.anthropic.com/settings/keys",
|
||||||
@ -10,13 +10,14 @@
|
|||||||
"transform_req": {
|
"transform_req": {
|
||||||
"chat_completions": {
|
"chat_completions": {
|
||||||
"url": "https://api.anthropic.com/v1/messages",
|
"url": "https://api.anthropic.com/v1/messages",
|
||||||
"template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": \"{{ input_request.messages.0.content }}\", \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": \"{{ message.role}}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
|
"template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": {{ tojson(input_request.messages.0.content) }}, \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"metadata\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"transform_resp": {
|
"transform_resp": {
|
||||||
"chat_completions": {
|
"chat_completions": {
|
||||||
"template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": \"{{ input_request.model }}\", \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"{{ input_request.role }}\", \"content\": {% if input_request.content and input_request.content.0.type == \"text\" %} \"{{input_request.content.0.text}}\" {% else %} null {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.stop_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.usage.input_tokens }}, \"completion_tokens\": {{ input_request.usage.output_tokens }}, \"total_tokens\": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 }, \"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
|
"template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {{tojson(input_request)}} {% endif %}"
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"explore_models_url": "https://docs.anthropic.com/en/docs/about-claude/models"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"id": "@janhq/inference-cohere-extension",
|
"id": "cohere",
|
||||||
"type": "remote",
|
"type": "remote",
|
||||||
"engine": "cohere",
|
"engine": "cohere",
|
||||||
"url": "https://dashboard.cohere.com/api-keys",
|
"url": "https://dashboard.cohere.com/api-keys",
|
||||||
@ -10,13 +10,14 @@
|
|||||||
"transform_req": {
|
"transform_req": {
|
||||||
"chat_completions": {
|
"chat_completions": {
|
||||||
"url": "https://api.cohere.ai/v1/chat",
|
"url": "https://api.cohere.ai/v1/chat",
|
||||||
"template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": \"{{ input_request.messages.0.content }}\", {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
|
"template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": {{ tojson(input_request.messages.0.content) }}, {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"transform_resp": {
|
"transform_resp": {
|
||||||
"chat_completions": {
|
"chat_completions": {
|
||||||
"template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.event_type == \"text-generation\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.text }}\" {% else %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.event_type == \"stream-end\" %} \"finish_reason\": \"{{ input_request.finish_reason }}\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.generation_id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": {% if input_request.model %} \"{{ input_request.model }}\" {% else %} \"command-r-plus-08-2024\" {% endif %}, \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"assistant\", \"content\": {% if not input_request.text %} null {% else %} \"{{ input_request.text }}\" {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.finish_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.meta.tokens.input_tokens }}, \"completion_tokens\": {{ input_request.meta.tokens.output_tokens }},\"total_tokens\": {{ input_request.meta.tokens.input_tokens + input_request.meta.tokens.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 },\"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
|
"template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.event_type == \"text-generation\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.text }}\" {% else %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.event_type == \"stream-end\" %} \"finish_reason\": \"{{ input_request.finish_reason }}\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.generation_id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": {% if input_request.model %} \"{{ input_request.model }}\" {% else %} \"command-r-plus-08-2024\" {% endif %}, \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"assistant\", \"content\": {% if not input_request.text %} null {% else %} \"{{ input_request.text }}\" {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.finish_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.meta.tokens.input_tokens }}, \"completion_tokens\": {{ input_request.meta.tokens.output_tokens }},\"total_tokens\": {{ input_request.meta.tokens.input_tokens + input_request.meta.tokens.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 },\"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"explore_models_url": "https://docs.cohere.com/v2/docs/models"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"id": "deepseek",
|
||||||
|
"type": "remote",
|
||||||
|
"engine": "deepseek",
|
||||||
|
"url": "https://platform.deepseek.com/api_keys",
|
||||||
|
"api_key": "",
|
||||||
|
"metadata": {
|
||||||
|
"get_models_url": "https://api.deepseek.com/models",
|
||||||
|
"header_template": "Authorization: Bearer {{api_key}}",
|
||||||
|
"transform_req": {
|
||||||
|
"chat_completions": {
|
||||||
|
"url": "https://api.deepseek.com/chat/completions",
|
||||||
|
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"transform_resp": {
|
||||||
|
"chat_completions": {
|
||||||
|
"template": "{{tojson(input_request)}}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"explore_models_url": "https://api-docs.deepseek.com/quick_start/pricing"
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"id": "google_gemini",
|
||||||
|
"type": "remote",
|
||||||
|
"engine": "google_gemini",
|
||||||
|
"url": "https://aistudio.google.com/apikey",
|
||||||
|
"api_key": "",
|
||||||
|
"metadata": {
|
||||||
|
"get_models_url": "https://generativelanguage.googleapis.com/v1beta/models",
|
||||||
|
"header_template": "Authorization: Bearer {{api_key}}",
|
||||||
|
"transform_req": {
|
||||||
|
"chat_completions": {
|
||||||
|
"url": "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
|
||||||
|
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"transform_resp": {
|
||||||
|
"chat_completions": {
|
||||||
|
"template": "{{tojson(input_request)}}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"explore_models_url": "https://ai.google.dev/gemini-api/docs/models/gemini"
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"id": "@janhq/inference-groq-extension",
|
"id": "groq",
|
||||||
"type": "remote",
|
"type": "remote",
|
||||||
"engine": "groq",
|
"engine": "groq",
|
||||||
"url": "https://console.groq.com/keys",
|
"url": "https://console.groq.com/keys",
|
||||||
@ -15,8 +15,9 @@
|
|||||||
},
|
},
|
||||||
"transform_resp": {
|
"transform_resp": {
|
||||||
"chat_completions": {
|
"chat_completions": {
|
||||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
"template": "{{tojson(input_request)}}"
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"explore_models_url": "https://console.groq.com/docs/models"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"id": "@janhq/inference-martian-extension",
|
"id": "martian",
|
||||||
"type": "remote",
|
"type": "remote",
|
||||||
"engine": "martian",
|
"engine": "martian",
|
||||||
"url": "https://withmartian.com/dashboard",
|
"url": "https://withmartian.com/dashboard",
|
||||||
@ -15,8 +15,9 @@
|
|||||||
},
|
},
|
||||||
"transform_resp": {
|
"transform_resp": {
|
||||||
"chat_completions": {
|
"chat_completions": {
|
||||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
"template": "{{tojson(input_request)}}"
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"explore_models_url": "https://withmartian.github.io/llm-adapters/"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"id": "@janhq/inference-mistral-extension",
|
"id": "mistral",
|
||||||
"type": "remote",
|
"type": "remote",
|
||||||
"engine": "mistral",
|
"engine": "mistral",
|
||||||
"url": "https://console.mistral.ai/api-keys/",
|
"url": "https://console.mistral.ai/api-keys/",
|
||||||
@ -17,6 +17,7 @@
|
|||||||
"chat_completions": {
|
"chat_completions": {
|
||||||
"template": "{{tojson(input_request)}}"
|
"template": "{{tojson(input_request)}}"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"explore_models_url": "https://docs.mistral.ai/getting-started/models/models_overview/"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"id": "@janhq/inference-nvidia-extension",
|
"id": "nvidia",
|
||||||
"type": "remote",
|
"type": "remote",
|
||||||
"engine": "nvidia",
|
"engine": "nvidia",
|
||||||
"url": "https://org.ngc.nvidia.com/setup/personal-keys",
|
"url": "https://org.ngc.nvidia.com/setup/personal-keys",
|
||||||
@ -15,8 +15,9 @@
|
|||||||
},
|
},
|
||||||
"transform_resp": {
|
"transform_resp": {
|
||||||
"chat_completions": {
|
"chat_completions": {
|
||||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
"template": "{{tojson(input_request)}}"
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"explore_models_url": "https://build.nvidia.com/models"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"id": "@janhq/inference-openai-extension",
|
"id": "openai",
|
||||||
"type": "remote",
|
"type": "remote",
|
||||||
"engine": "openai",
|
"engine": "openai",
|
||||||
"url": "https://platform.openai.com/account/api-keys",
|
"url": "https://platform.openai.com/account/api-keys",
|
||||||
@ -10,13 +10,14 @@
|
|||||||
"transform_req": {
|
"transform_req": {
|
||||||
"chat_completions": {
|
"chat_completions": {
|
||||||
"url": "https://api.openai.com/v1/chat/completions",
|
"url": "https://api.openai.com/v1/chat/completions",
|
||||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
|
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% set first = false %} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"transform_resp": {
|
"transform_resp": {
|
||||||
"chat_completions": {
|
"chat_completions": {
|
||||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
"template": "{{tojson(input_request)}}"
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"explore_models_url": "https://platform.openai.com/docs/models"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"id": "@janhq/inference-openrouter-extension",
|
"id": "openrouter",
|
||||||
"type": "remote",
|
"type": "remote",
|
||||||
"engine": "openrouter",
|
"engine": "openrouter",
|
||||||
"url": "https://openrouter.ai/keys",
|
"url": "https://openrouter.ai/keys",
|
||||||
@ -10,13 +10,14 @@
|
|||||||
"transform_req": {
|
"transform_req": {
|
||||||
"chat_completions": {
|
"chat_completions": {
|
||||||
"url": "https://openrouter.ai/api/v1/chat/completions",
|
"url": "https://openrouter.ai/api/v1/chat/completions",
|
||||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"transform_resp": {
|
"transform_resp": {
|
||||||
"chat_completions": {
|
"chat_completions": {
|
||||||
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
|
"template": "{{tojson(input_request)}}"
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"explore_models_url": "https://openrouter.ai/models"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -13,9 +13,19 @@ export default defineConfig([
|
|||||||
NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
|
NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
|
||||||
API_URL: JSON.stringify('http://127.0.0.1:39291'),
|
API_URL: JSON.stringify('http://127.0.0.1:39291'),
|
||||||
SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
|
SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
|
||||||
|
PLATFORM: JSON.stringify(process.platform),
|
||||||
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
|
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
|
||||||
DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
|
DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
|
||||||
DEFAULT_REMOTE_MODELS: JSON.stringify(models),
|
DEFAULT_REMOTE_MODELS: JSON.stringify(models),
|
||||||
|
DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
|
||||||
|
`{ {% set first = true %} {% for key, value in input_request %} {% if key == "messages" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} {% if not first %},{% endif %} "{{ key }}": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }`
|
||||||
|
),
|
||||||
|
DEFAULT_RESPONSE_BODY_TRANSFORM: JSON.stringify(
|
||||||
|
'{{tojson(input_request)}}'
|
||||||
|
),
|
||||||
|
DEFAULT_REQUEST_HEADERS_TRANSFORM: JSON.stringify(
|
||||||
|
'Authorization: Bearer {{api_key}}'
|
||||||
|
),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -29,15 +39,4 @@ export default defineConfig([
|
|||||||
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
|
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
input: 'src/node/cpuInfo.ts',
|
|
||||||
output: {
|
|
||||||
format: 'cjs',
|
|
||||||
file: 'dist/node/cpuInfo.js',
|
|
||||||
},
|
|
||||||
external: ['cpu-instructions'],
|
|
||||||
resolve: {
|
|
||||||
extensions: ['.ts', '.js', '.svg'],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
])
|
])
|
||||||
|
|||||||
@ -1,7 +1,11 @@
|
|||||||
declare const API_URL: string
|
declare const API_URL: string
|
||||||
declare const CORTEX_ENGINE_VERSION: string
|
declare const CORTEX_ENGINE_VERSION: string
|
||||||
|
declare const PLATFORM: string
|
||||||
declare const SOCKET_URL: string
|
declare const SOCKET_URL: string
|
||||||
declare const NODE: string
|
declare const NODE: string
|
||||||
|
declare const DEFAULT_REQUEST_PAYLOAD_TRANSFORM: string
|
||||||
|
declare const DEFAULT_RESPONSE_BODY_TRANSFORM: string
|
||||||
|
declare const DEFAULT_REQUEST_HEADERS_TRANSFORM: string
|
||||||
|
|
||||||
declare const DEFAULT_REMOTE_ENGINES: ({
|
declare const DEFAULT_REMOTE_ENGINES: ({
|
||||||
id: string
|
id: string
|
||||||
|
|||||||
@ -19,12 +19,16 @@ import ky, { HTTPError } from 'ky'
|
|||||||
import PQueue from 'p-queue'
|
import PQueue from 'p-queue'
|
||||||
import { EngineError } from './error'
|
import { EngineError } from './error'
|
||||||
import { getJanDataFolderPath } from '@janhq/core'
|
import { getJanDataFolderPath } from '@janhq/core'
|
||||||
|
import { engineVariant } from './utils'
|
||||||
|
|
||||||
|
interface ModelList {
|
||||||
|
data: Model[]
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* JSONEngineManagementExtension is a EngineManagementExtension implementation that provides
|
* JanEngineManagementExtension is a EngineManagementExtension implementation that provides
|
||||||
* functionality for managing engines.
|
* functionality for managing engines.
|
||||||
*/
|
*/
|
||||||
export default class JSONEngineManagementExtension extends EngineManagementExtension {
|
export default class JanEngineManagementExtension extends EngineManagementExtension {
|
||||||
queue = new PQueue({ concurrency: 1 })
|
queue = new PQueue({ concurrency: 1 })
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -63,13 +67,12 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
|
|||||||
* @returns A Promise that resolves to an object of list engines.
|
* @returns A Promise that resolves to an object of list engines.
|
||||||
*/
|
*/
|
||||||
async getRemoteModels(name: string): Promise<any> {
|
async getRemoteModels(name: string): Promise<any> {
|
||||||
return this.queue.add(() =>
|
return ky
|
||||||
ky
|
|
||||||
.get(`${API_URL}/v1/models/remote/${name}`)
|
.get(`${API_URL}/v1/models/remote/${name}`)
|
||||||
.json<Model[]>()
|
.json<ModelList>()
|
||||||
.then((e) => e)
|
.catch(() => ({
|
||||||
.catch(() => [])
|
data: [],
|
||||||
) as Promise<Model[]>
|
})) as Promise<ModelList>
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -138,9 +141,38 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
|
|||||||
* Add a new remote engine
|
* Add a new remote engine
|
||||||
* @returns A Promise that resolves to intall of engine.
|
* @returns A Promise that resolves to intall of engine.
|
||||||
*/
|
*/
|
||||||
async addRemoteEngine(engineConfig: EngineConfig) {
|
async addRemoteEngine(
|
||||||
|
engineConfig: EngineConfig,
|
||||||
|
persistModels: boolean = true
|
||||||
|
) {
|
||||||
|
// Populate default settings
|
||||||
|
if (
|
||||||
|
engineConfig.metadata?.transform_req?.chat_completions &&
|
||||||
|
!engineConfig.metadata.transform_req.chat_completions.template
|
||||||
|
)
|
||||||
|
engineConfig.metadata.transform_req.chat_completions.template =
|
||||||
|
DEFAULT_REQUEST_PAYLOAD_TRANSFORM
|
||||||
|
|
||||||
|
if (
|
||||||
|
engineConfig.metadata?.transform_resp?.chat_completions &&
|
||||||
|
!engineConfig.metadata.transform_resp.chat_completions?.template
|
||||||
|
)
|
||||||
|
engineConfig.metadata.transform_resp.chat_completions.template =
|
||||||
|
DEFAULT_RESPONSE_BODY_TRANSFORM
|
||||||
|
|
||||||
|
if (engineConfig.metadata && !engineConfig.metadata?.header_template)
|
||||||
|
engineConfig.metadata.header_template = DEFAULT_REQUEST_HEADERS_TRANSFORM
|
||||||
|
|
||||||
return this.queue.add(() =>
|
return this.queue.add(() =>
|
||||||
ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => e)
|
ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => {
|
||||||
|
if (persistModels && engineConfig.metadata?.get_models_url) {
|
||||||
|
// Pull /models from remote models endpoint
|
||||||
|
return this.populateRemoteModels(engineConfig)
|
||||||
|
.then(() => e)
|
||||||
|
.catch(() => e)
|
||||||
|
}
|
||||||
|
return e
|
||||||
|
})
|
||||||
) as Promise<{ messages: string }>
|
) as Promise<{ messages: string }>
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -161,9 +193,25 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
|
|||||||
* @param model - Remote model object.
|
* @param model - Remote model object.
|
||||||
*/
|
*/
|
||||||
async addRemoteModel(model: Model) {
|
async addRemoteModel(model: Model) {
|
||||||
return this.queue.add(() =>
|
return this.queue
|
||||||
ky.post(`${API_URL}/v1/models/add`, { json: model }).then((e) => e)
|
.add(() =>
|
||||||
|
ky
|
||||||
|
.post(`${API_URL}/v1/models/add`, {
|
||||||
|
json: {
|
||||||
|
inference_params: {
|
||||||
|
max_tokens: 4096,
|
||||||
|
temperature: 0.7,
|
||||||
|
top_p: 0.95,
|
||||||
|
stream: true,
|
||||||
|
frequency_penalty: 0,
|
||||||
|
presence_penalty: 0,
|
||||||
|
},
|
||||||
|
...model,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
.then((e) => e)
|
||||||
)
|
)
|
||||||
|
.then(() => {})
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -245,11 +293,7 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
|
|||||||
error instanceof EngineError
|
error instanceof EngineError
|
||||||
) {
|
) {
|
||||||
const systemInfo = await systemInformation()
|
const systemInfo = await systemInformation()
|
||||||
const variant = await executeOnMain(
|
const variant = await engineVariant(systemInfo.gpuSetting)
|
||||||
NODE,
|
|
||||||
'engineVariant',
|
|
||||||
systemInfo.gpuSetting
|
|
||||||
)
|
|
||||||
await this.setDefaultEngineVariant(InferenceEngine.cortex_llamacpp, {
|
await this.setDefaultEngineVariant(InferenceEngine.cortex_llamacpp, {
|
||||||
variant: variant,
|
variant: variant,
|
||||||
version: `${CORTEX_ENGINE_VERSION}`,
|
version: `${CORTEX_ENGINE_VERSION}`,
|
||||||
@ -293,14 +337,40 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
|
|||||||
data.api_key = api_key
|
data.api_key = api_key
|
||||||
/// END - Migrate legacy api key settings
|
/// END - Migrate legacy api key settings
|
||||||
|
|
||||||
await this.addRemoteEngine(data).catch(console.error)
|
await this.addRemoteEngine(data, false).catch(console.error)
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
events.emit(EngineEvent.OnEngineUpdate, {})
|
events.emit(EngineEvent.OnEngineUpdate, {})
|
||||||
DEFAULT_REMOTE_MODELS.forEach(async (data: Model) => {
|
await Promise.all(
|
||||||
await this.addRemoteModel(data).catch(() => {})
|
DEFAULT_REMOTE_MODELS.map((data: Model) =>
|
||||||
})
|
this.addRemoteModel(data).catch(() => {})
|
||||||
|
)
|
||||||
|
)
|
||||||
events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
|
events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pulls models list from the remote provider and persist
|
||||||
|
* @param engineConfig
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
|
private populateRemoteModels = async (engineConfig: EngineConfig) => {
|
||||||
|
return this.getRemoteModels(engineConfig.engine)
|
||||||
|
.then((models: ModelList) => {
|
||||||
|
if (models?.data)
|
||||||
|
Promise.all(
|
||||||
|
models.data.map((model) =>
|
||||||
|
this.addRemoteModel({
|
||||||
|
...model,
|
||||||
|
engine: engineConfig.engine as InferenceEngine,
|
||||||
|
model: model.model ?? model.id,
|
||||||
|
}).catch(console.info)
|
||||||
|
)
|
||||||
|
).then(() => {
|
||||||
|
events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.catch(console.info)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,27 +0,0 @@
|
|||||||
import { cpuInfo } from 'cpu-instructions'
|
|
||||||
|
|
||||||
// Check the CPU info and determine the supported instruction set
|
|
||||||
const info = cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
|
|
||||||
? 'avx512'
|
|
||||||
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
|
|
||||||
? 'avx2'
|
|
||||||
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
|
|
||||||
? 'avx'
|
|
||||||
: 'noavx'
|
|
||||||
|
|
||||||
// Send the result and wait for confirmation before exiting
|
|
||||||
new Promise<void>((resolve, reject) => {
|
|
||||||
// @ts-ignore
|
|
||||||
process.send(info, (error: Error | null) => {
|
|
||||||
if (error) {
|
|
||||||
reject(error)
|
|
||||||
} else {
|
|
||||||
resolve()
|
|
||||||
}
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.then(() => process.exit(0))
|
|
||||||
.catch((error) => {
|
|
||||||
console.error('Failed to send info:', error)
|
|
||||||
process.exit(1)
|
|
||||||
})
|
|
||||||
@ -1,7 +1,6 @@
|
|||||||
import { describe, expect, it } from '@jest/globals'
|
import { describe, expect, it } from '@jest/globals'
|
||||||
import engine from './index'
|
import engine from './index'
|
||||||
import { GpuSetting } from '@janhq/core/node'
|
import { GpuSetting } from '@janhq/core'
|
||||||
import { cpuInfo } from 'cpu-instructions'
|
|
||||||
import { fork } from 'child_process'
|
import { fork } from 'child_process'
|
||||||
|
|
||||||
let testSettings: GpuSetting = {
|
let testSettings: GpuSetting = {
|
||||||
@ -23,22 +22,12 @@ let testSettings: GpuSetting = {
|
|||||||
}
|
}
|
||||||
const originalPlatform = process.platform
|
const originalPlatform = process.platform
|
||||||
|
|
||||||
jest.mock('cpu-instructions', () => ({
|
|
||||||
cpuInfo: {
|
|
||||||
cpuInfo: jest.fn(),
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
|
|
||||||
mockCpuInfo.mockReturnValue([])
|
|
||||||
|
|
||||||
jest.mock('@janhq/core/node', () => ({
|
|
||||||
|
jest.mock('@janhq/core', () => ({
|
||||||
appResourcePath: () => '.',
|
appResourcePath: () => '.',
|
||||||
log: jest.fn(),
|
log: jest.fn(),
|
||||||
}))
|
}))
|
||||||
jest.mock('child_process', () => ({
|
|
||||||
fork: jest.fn(),
|
|
||||||
}))
|
|
||||||
const mockFork = fork as jest.Mock
|
|
||||||
|
|
||||||
describe('test executable cortex file', () => {
|
describe('test executable cortex file', () => {
|
||||||
afterAll(function () {
|
afterAll(function () {
|
||||||
@ -48,14 +37,7 @@ describe('test executable cortex file', () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
it('executes on MacOS', () => {
|
it('executes on MacOS', () => {
|
||||||
const mockProcess = {
|
|
||||||
on: jest.fn((event, callback) => {
|
|
||||||
if (event === 'message') {
|
|
||||||
callback('noavx')
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
send: jest.fn(),
|
|
||||||
}
|
|
||||||
Object.defineProperty(process, 'platform', {
|
Object.defineProperty(process, 'platform', {
|
||||||
value: 'darwin',
|
value: 'darwin',
|
||||||
})
|
})
|
||||||
@ -63,7 +45,7 @@ describe('test executable cortex file', () => {
|
|||||||
value: 'arm64',
|
value: 'arm64',
|
||||||
})
|
})
|
||||||
|
|
||||||
mockFork.mockReturnValue(mockProcess)
|
|
||||||
expect(engine.engineVariant(testSettings)).resolves.toEqual('mac-arm64')
|
expect(engine.engineVariant(testSettings)).resolves.toEqual('mac-arm64')
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -83,7 +65,7 @@ describe('test executable cortex file', () => {
|
|||||||
}),
|
}),
|
||||||
send: jest.fn(),
|
send: jest.fn(),
|
||||||
}
|
}
|
||||||
mockFork.mockReturnValue(mockProcess)
|
|
||||||
Object.defineProperty(process, 'arch', {
|
Object.defineProperty(process, 'arch', {
|
||||||
value: 'x64',
|
value: 'x64',
|
||||||
})
|
})
|
||||||
@ -107,7 +89,6 @@ describe('test executable cortex file', () => {
|
|||||||
}),
|
}),
|
||||||
send: jest.fn(),
|
send: jest.fn(),
|
||||||
}
|
}
|
||||||
mockFork.mockReturnValue(mockProcess)
|
|
||||||
|
|
||||||
expect(engine.engineVariant()).resolves.toEqual('windows-amd64-avx')
|
expect(engine.engineVariant()).resolves.toEqual('windows-amd64-avx')
|
||||||
})
|
})
|
||||||
@ -145,7 +126,6 @@ describe('test executable cortex file', () => {
|
|||||||
}),
|
}),
|
||||||
send: jest.fn(),
|
send: jest.fn(),
|
||||||
}
|
}
|
||||||
mockFork.mockReturnValue(mockProcess)
|
|
||||||
|
|
||||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||||
'windows-amd64-avx2-cuda-11-7'
|
'windows-amd64-avx2-cuda-11-7'
|
||||||
@ -176,26 +156,11 @@ describe('test executable cortex file', () => {
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
mockFork.mockReturnValue({
|
|
||||||
on: jest.fn((event, callback) => {
|
|
||||||
if (event === 'message') {
|
|
||||||
callback('noavx')
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
send: jest.fn(),
|
|
||||||
})
|
|
||||||
|
|
||||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||||
'windows-amd64-noavx-cuda-12-0'
|
'windows-amd64-noavx-cuda-12-0'
|
||||||
)
|
)
|
||||||
mockFork.mockReturnValue({
|
|
||||||
on: jest.fn((event, callback) => {
|
|
||||||
if (event === 'message') {
|
|
||||||
callback('avx512')
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
send: jest.fn(),
|
|
||||||
})
|
|
||||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||||
'windows-amd64-avx2-cuda-12-0'
|
'windows-amd64-avx2-cuda-12-0'
|
||||||
)
|
)
|
||||||
@ -209,14 +174,6 @@ describe('test executable cortex file', () => {
|
|||||||
...testSettings,
|
...testSettings,
|
||||||
run_mode: 'cpu',
|
run_mode: 'cpu',
|
||||||
}
|
}
|
||||||
mockFork.mockReturnValue({
|
|
||||||
on: jest.fn((event, callback) => {
|
|
||||||
if (event === 'message') {
|
|
||||||
callback('noavx')
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
send: jest.fn(),
|
|
||||||
})
|
|
||||||
|
|
||||||
expect(engine.engineVariant()).resolves.toEqual('linux-amd64-noavx')
|
expect(engine.engineVariant()).resolves.toEqual('linux-amd64-noavx')
|
||||||
})
|
})
|
||||||
@ -245,16 +202,6 @@ describe('test executable cortex file', () => {
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
mockFork.mockReturnValue({
|
|
||||||
on: jest.fn((event, callback) => {
|
|
||||||
if (event === 'message') {
|
|
||||||
callback('avx512')
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
send: jest.fn(),
|
|
||||||
})
|
|
||||||
|
|
||||||
expect(engine.engineVariant(settings)).resolves.toBe(
|
expect(engine.engineVariant(settings)).resolves.toBe(
|
||||||
'linux-amd64-avx2-cuda-11-7'
|
'linux-amd64-avx2-cuda-11-7'
|
||||||
)
|
)
|
||||||
@ -284,14 +231,7 @@ describe('test executable cortex file', () => {
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
mockFork.mockReturnValue({
|
|
||||||
on: jest.fn((event, callback) => {
|
|
||||||
if (event === 'message') {
|
|
||||||
callback('avx2')
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
send: jest.fn(),
|
|
||||||
})
|
|
||||||
|
|
||||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||||
'linux-amd64-avx2-cuda-12-0'
|
'linux-amd64-avx2-cuda-12-0'
|
||||||
@ -310,15 +250,6 @@ describe('test executable cortex file', () => {
|
|||||||
|
|
||||||
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||||
cpuInstructions.forEach((instruction) => {
|
cpuInstructions.forEach((instruction) => {
|
||||||
mockFork.mockReturnValue({
|
|
||||||
on: jest.fn((event, callback) => {
|
|
||||||
if (event === 'message') {
|
|
||||||
callback(instruction)
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
send: jest.fn(),
|
|
||||||
})
|
|
||||||
|
|
||||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||||
`linux-amd64-${instruction}`
|
`linux-amd64-${instruction}`
|
||||||
)
|
)
|
||||||
@ -335,14 +266,7 @@ describe('test executable cortex file', () => {
|
|||||||
}
|
}
|
||||||
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||||
cpuInstructions.forEach((instruction) => {
|
cpuInstructions.forEach((instruction) => {
|
||||||
mockFork.mockReturnValue({
|
|
||||||
on: jest.fn((event, callback) => {
|
|
||||||
if (event === 'message') {
|
|
||||||
callback(instruction)
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
send: jest.fn(),
|
|
||||||
})
|
|
||||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||||
`windows-amd64-${instruction}`
|
`windows-amd64-${instruction}`
|
||||||
)
|
)
|
||||||
@ -376,14 +300,7 @@ describe('test executable cortex file', () => {
|
|||||||
}
|
}
|
||||||
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||||
cpuInstructions.forEach((instruction) => {
|
cpuInstructions.forEach((instruction) => {
|
||||||
mockFork.mockReturnValue({
|
|
||||||
on: jest.fn((event, callback) => {
|
|
||||||
if (event === 'message') {
|
|
||||||
callback(instruction)
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
send: jest.fn(),
|
|
||||||
})
|
|
||||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||||
`windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
|
`windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
|
||||||
)
|
)
|
||||||
@ -417,14 +334,7 @@ describe('test executable cortex file', () => {
|
|||||||
],
|
],
|
||||||
}
|
}
|
||||||
cpuInstructions.forEach((instruction) => {
|
cpuInstructions.forEach((instruction) => {
|
||||||
mockFork.mockReturnValue({
|
|
||||||
on: jest.fn((event, callback) => {
|
|
||||||
if (event === 'message') {
|
|
||||||
callback(instruction)
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
send: jest.fn(),
|
|
||||||
})
|
|
||||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||||
`linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
|
`linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
|
||||||
)
|
)
|
||||||
@ -459,14 +369,7 @@ describe('test executable cortex file', () => {
|
|||||||
],
|
],
|
||||||
}
|
}
|
||||||
cpuInstructions.forEach((instruction) => {
|
cpuInstructions.forEach((instruction) => {
|
||||||
mockFork.mockReturnValue({
|
|
||||||
on: jest.fn((event, callback) => {
|
|
||||||
if (event === 'message') {
|
|
||||||
callback(instruction)
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
send: jest.fn(),
|
|
||||||
})
|
|
||||||
expect(engine.engineVariant(settings)).resolves.toEqual(
|
expect(engine.engineVariant(settings)).resolves.toEqual(
|
||||||
`linux-amd64-vulkan`
|
`linux-amd64-vulkan`
|
||||||
)
|
)
|
||||||
|
|||||||
@ -2,111 +2,10 @@ import * as path from 'path'
|
|||||||
import {
|
import {
|
||||||
appResourcePath,
|
appResourcePath,
|
||||||
getJanDataFolderPath,
|
getJanDataFolderPath,
|
||||||
GpuSetting,
|
|
||||||
log,
|
log,
|
||||||
} from '@janhq/core/node'
|
} from '@janhq/core/node'
|
||||||
import { fork } from 'child_process'
|
|
||||||
import { mkdir, readdir, symlink } from 'fs/promises'
|
import { mkdir, readdir, symlink } from 'fs/promises'
|
||||||
|
|
||||||
/**
|
|
||||||
* The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
|
|
||||||
* @param settings
|
|
||||||
* @returns
|
|
||||||
*/
|
|
||||||
const gpuRunMode = (settings?: GpuSetting): string => {
|
|
||||||
if (process.platform === 'darwin')
|
|
||||||
// MacOS now has universal binaries
|
|
||||||
return ''
|
|
||||||
|
|
||||||
if (!settings) return ''
|
|
||||||
|
|
||||||
return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The OS & architecture that the current process is running on.
|
|
||||||
* @returns win, mac-x64, mac-arm64, or linux
|
|
||||||
*/
|
|
||||||
const os = (): string => {
|
|
||||||
return process.platform === 'win32'
|
|
||||||
? 'windows-amd64'
|
|
||||||
: process.platform === 'darwin'
|
|
||||||
? process.arch === 'arm64'
|
|
||||||
? 'mac-arm64'
|
|
||||||
: 'mac-amd64'
|
|
||||||
: 'linux-amd64'
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The CUDA version that will be set - either '11-7' or '12-0'.
|
|
||||||
* @param settings
|
|
||||||
* @returns
|
|
||||||
*/
|
|
||||||
const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
|
|
||||||
const isUsingCuda =
|
|
||||||
settings?.vulkan !== true &&
|
|
||||||
settings?.run_mode === 'gpu' &&
|
|
||||||
!os().includes('mac')
|
|
||||||
|
|
||||||
if (!isUsingCuda) return undefined
|
|
||||||
return settings?.cuda?.version === '11' ? '11-7' : '12-0'
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
|
|
||||||
* @returns
|
|
||||||
*/
|
|
||||||
const cpuInstructions = async (): Promise<string> => {
|
|
||||||
if (process.platform === 'darwin') return ''
|
|
||||||
|
|
||||||
const child = fork(path.join(__dirname, './cpuInfo.js')) // Path to the child process file
|
|
||||||
|
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
child.on('message', (cpuInfo?: string) => {
|
|
||||||
resolve(cpuInfo ?? 'noavx')
|
|
||||||
child.kill() // Kill the child process after receiving the result
|
|
||||||
})
|
|
||||||
|
|
||||||
child.on('error', (err) => {
|
|
||||||
resolve('noavx')
|
|
||||||
child.kill()
|
|
||||||
})
|
|
||||||
|
|
||||||
child.on('exit', (code) => {
|
|
||||||
if (code !== 0) {
|
|
||||||
resolve('noavx')
|
|
||||||
child.kill()
|
|
||||||
}
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find which variant to run based on the current platform.
|
|
||||||
*/
|
|
||||||
const engineVariant = async (gpuSetting?: GpuSetting): Promise<string> => {
|
|
||||||
const cpuInstruction = await cpuInstructions()
|
|
||||||
log(`[CORTEX]: CPU instruction: ${cpuInstruction}`)
|
|
||||||
let engineVariant = [
|
|
||||||
os(),
|
|
||||||
gpuSetting?.vulkan
|
|
||||||
? 'vulkan'
|
|
||||||
: gpuRunMode(gpuSetting) !== 'cuda'
|
|
||||||
? // CPU mode - support all variants
|
|
||||||
cpuInstruction
|
|
||||||
: // GPU mode - packaged CUDA variants of avx2 and noavx
|
|
||||||
cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
|
|
||||||
? 'avx2'
|
|
||||||
: 'noavx',
|
|
||||||
gpuRunMode(gpuSetting),
|
|
||||||
cudaVersion(gpuSetting),
|
|
||||||
]
|
|
||||||
.filter((e) => !!e)
|
|
||||||
.join('-')
|
|
||||||
|
|
||||||
log(`[CORTEX]: Engine variant: ${engineVariant}`)
|
|
||||||
return engineVariant
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create symlink to each variant for the default bundled version
|
* Create symlink to each variant for the default bundled version
|
||||||
@ -148,6 +47,5 @@ const symlinkEngines = async () => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export default {
|
export default {
|
||||||
engineVariant,
|
|
||||||
symlinkEngines,
|
symlinkEngines,
|
||||||
}
|
}
|
||||||
|
|||||||
86
extensions/engine-management-extension/src/utils.ts
Normal file
86
extensions/engine-management-extension/src/utils.ts
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
import { GpuSetting, log } from '@janhq/core'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
|
||||||
|
* @param settings
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
|
|
||||||
|
const gpuRunMode = (settings?: GpuSetting): string => {
|
||||||
|
return settings.gpus?.some(
|
||||||
|
(gpu) =>
|
||||||
|
gpu.activated === true &&
|
||||||
|
gpu.additional_information &&
|
||||||
|
gpu.additional_information.driver_version
|
||||||
|
)
|
||||||
|
? 'cuda'
|
||||||
|
: ''
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The OS & architecture that the current process is running on.
|
||||||
|
* @returns win, mac-x64, mac-arm64, or linux
|
||||||
|
*/
|
||||||
|
const os = (settings?: GpuSetting): string => {
|
||||||
|
return PLATFORM === 'win32'
|
||||||
|
? 'windows-amd64'
|
||||||
|
: PLATFORM === 'darwin'
|
||||||
|
? settings?.cpu?.arch === 'arm64'
|
||||||
|
? 'mac-arm64'
|
||||||
|
: 'mac-amd64'
|
||||||
|
: 'linux-amd64'
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The CUDA version that will be set - either '11-7' or '12-0'.
|
||||||
|
* @param settings
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
|
const cudaVersion = (settings?: GpuSetting): '12-0' | '11-7' | undefined => {
|
||||||
|
const isUsingCuda =
|
||||||
|
settings?.vulkan !== true &&
|
||||||
|
settings?.gpus?.some((gpu) => (gpu.activated === true ? 'gpu' : 'cpu')) &&
|
||||||
|
!os().includes('mac')
|
||||||
|
|
||||||
|
if (!isUsingCuda) return undefined
|
||||||
|
// return settings?.cuda?.version === '11' ? '11-7' : '12-0'
|
||||||
|
return settings.gpus?.some((gpu) => gpu.version.includes('12'))
|
||||||
|
? '12-0'
|
||||||
|
: '11-7'
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find which variant to run based on the current platform.
|
||||||
|
*/
|
||||||
|
export const engineVariant = async (
|
||||||
|
gpuSetting?: GpuSetting
|
||||||
|
): Promise<string> => {
|
||||||
|
const platform = os(gpuSetting)
|
||||||
|
|
||||||
|
// There is no need to append the variant extension for mac
|
||||||
|
if (platform.startsWith('mac')) return platform
|
||||||
|
|
||||||
|
let engineVariant =
|
||||||
|
gpuSetting?.vulkan || gpuSetting.gpus.some((e) => !e.additional_information)
|
||||||
|
? [platform, 'vulkan']
|
||||||
|
: [
|
||||||
|
platform,
|
||||||
|
gpuRunMode(gpuSetting) === 'cuda' &&
|
||||||
|
(gpuSetting.cpu.instructions.includes('avx2') ||
|
||||||
|
gpuSetting.cpu.instructions.includes('avx512'))
|
||||||
|
? 'avx2'
|
||||||
|
: 'noavx',
|
||||||
|
gpuRunMode(gpuSetting),
|
||||||
|
cudaVersion(gpuSetting),
|
||||||
|
].filter(Boolean) // Remove any falsy values
|
||||||
|
|
||||||
|
let engineVariantString = engineVariant.join('-')
|
||||||
|
|
||||||
|
log(`[CORTEX]: Engine variant: ${engineVariantString}`)
|
||||||
|
return engineVariantString
|
||||||
|
}
|
||||||
5
extensions/hardware-management-extension/jest.config.js
Normal file
5
extensions/hardware-management-extension/jest.config.js
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
/** @type {import('ts-jest').JestConfigWithTsJest} */
|
||||||
|
module.exports = {
|
||||||
|
preset: 'ts-jest',
|
||||||
|
testEnvironment: 'node',
|
||||||
|
}
|
||||||
48
extensions/hardware-management-extension/package.json
Normal file
48
extensions/hardware-management-extension/package.json
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
{
|
||||||
|
"name": "@janhq/hardware-management-extension",
|
||||||
|
"productName": "Hardware Management",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Manages Better Hardware settings.",
|
||||||
|
"main": "dist/index.js",
|
||||||
|
"node": "dist/node/index.cjs.js",
|
||||||
|
"author": "Jan <service@jan.ai>",
|
||||||
|
"license": "MIT",
|
||||||
|
"scripts": {
|
||||||
|
"test": "jest",
|
||||||
|
"build": "rolldown -c rolldown.config.mjs",
|
||||||
|
"codesign:darwin": "../../.github/scripts/auto-sign.sh",
|
||||||
|
"codesign:win32:linux": "echo 'No codesigning required'",
|
||||||
|
"codesign": "run-script-os",
|
||||||
|
"build:publish": "rimraf *.tgz --glob || true && yarn build && yarn codesign && npm pack && cpx *.tgz ../../pre-install"
|
||||||
|
},
|
||||||
|
"exports": {
|
||||||
|
".": "./dist/index.js",
|
||||||
|
"./main": "./dist/module.js"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"cpx": "^1.5.0",
|
||||||
|
"rimraf": "^3.0.2",
|
||||||
|
"rolldown": "^1.0.0-beta.1",
|
||||||
|
"run-script-os": "^1.1.6",
|
||||||
|
"ts-loader": "^9.5.0",
|
||||||
|
"typescript": "^5.3.3"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@janhq/core": "../../core/package.tgz",
|
||||||
|
"cpu-instructions": "^0.0.13",
|
||||||
|
"ky": "^1.7.2",
|
||||||
|
"p-queue": "^8.0.1"
|
||||||
|
},
|
||||||
|
"bundledDependencies": [
|
||||||
|
"cpu-instructions",
|
||||||
|
"@janhq/core"
|
||||||
|
],
|
||||||
|
"hardwares": {
|
||||||
|
"node": ">=18.0.0"
|
||||||
|
},
|
||||||
|
"files": [
|
||||||
|
"dist/*",
|
||||||
|
"package.json",
|
||||||
|
"README.md"
|
||||||
|
]
|
||||||
|
}
|
||||||
17
extensions/hardware-management-extension/rolldown.config.mjs
Normal file
17
extensions/hardware-management-extension/rolldown.config.mjs
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
import { defineConfig } from 'rolldown'
|
||||||
|
import pkgJson from './package.json' with { type: 'json' }
|
||||||
|
|
||||||
|
export default defineConfig([
|
||||||
|
{
|
||||||
|
input: 'src/index.ts',
|
||||||
|
output: {
|
||||||
|
format: 'esm',
|
||||||
|
file: 'dist/index.js',
|
||||||
|
},
|
||||||
|
define: {
|
||||||
|
NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
|
||||||
|
API_URL: JSON.stringify('http://127.0.0.1:39291'),
|
||||||
|
SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
])
|
||||||
12
extensions/hardware-management-extension/src/@types/global.d.ts
vendored
Normal file
12
extensions/hardware-management-extension/src/@types/global.d.ts
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
declare const API_URL: string
|
||||||
|
declare const SOCKET_URL: string
|
||||||
|
declare const NODE: string
|
||||||
|
|
||||||
|
interface Core {
|
||||||
|
api: APIFunctions
|
||||||
|
events: EventEmitter
|
||||||
|
}
|
||||||
|
interface Window {
|
||||||
|
core?: Core | undefined
|
||||||
|
electronAPI?: any | undefined
|
||||||
|
}
|
||||||
67
extensions/hardware-management-extension/src/index.ts
Normal file
67
extensions/hardware-management-extension/src/index.ts
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
import {
|
||||||
|
executeOnMain,
|
||||||
|
HardwareManagementExtension,
|
||||||
|
HardwareInformation,
|
||||||
|
} from '@janhq/core'
|
||||||
|
import ky from 'ky'
|
||||||
|
import PQueue from 'p-queue'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* JSONHardwareManagementExtension is a HardwareManagementExtension implementation that provides
|
||||||
|
* functionality for managing engines.
|
||||||
|
*/
|
||||||
|
export default class JSONHardwareManagementExtension extends HardwareManagementExtension {
|
||||||
|
queue = new PQueue({ concurrency: 1 })
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when the extension is loaded.
|
||||||
|
*/
|
||||||
|
async onLoad() {
|
||||||
|
// Run Healthcheck
|
||||||
|
this.queue.add(() => this.healthz())
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when the extension is unloaded.
|
||||||
|
*/
|
||||||
|
onUnload() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Do health check on cortex.cpp
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
|
async healthz(): Promise<void> {
|
||||||
|
return ky
|
||||||
|
.get(`${API_URL}/healthz`, {
|
||||||
|
retry: { limit: 20, delay: () => 500, methods: ['get'] },
|
||||||
|
})
|
||||||
|
.then(() => {})
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @returns A Promise that resolves to an object of hardware.
|
||||||
|
*/
|
||||||
|
async getHardware(): Promise<HardwareInformation> {
|
||||||
|
return this.queue.add(() =>
|
||||||
|
ky
|
||||||
|
.get(`${API_URL}/v1/hardware`)
|
||||||
|
.json<HardwareInformation>()
|
||||||
|
.then((e) => e)
|
||||||
|
) as Promise<HardwareInformation>
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @returns A Promise that resolves to an object of set gpu activate.
|
||||||
|
*/
|
||||||
|
async setAvtiveGpu(data: { gpus: number[] }): Promise<{
|
||||||
|
message: string
|
||||||
|
activated_gpus: number[]
|
||||||
|
}> {
|
||||||
|
return this.queue.add(() =>
|
||||||
|
ky.post(`${API_URL}/v1/hardware/activate`, { json: data }).then((e) => e)
|
||||||
|
) as Promise<{
|
||||||
|
message: string
|
||||||
|
activated_gpus: number[]
|
||||||
|
}>
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -8,7 +8,9 @@
|
|||||||
"forceConsistentCasingInFileNames": true,
|
"forceConsistentCasingInFileNames": true,
|
||||||
"strict": false,
|
"strict": false,
|
||||||
"skipLibCheck": true,
|
"skipLibCheck": true,
|
||||||
"rootDir": "./src"
|
"rootDir": "./src",
|
||||||
|
"resolveJsonModule": true
|
||||||
},
|
},
|
||||||
"include": ["./src"]
|
"include": ["./src"],
|
||||||
|
"exclude": ["src/**/*.test.ts", "rolldown.config.mjs"]
|
||||||
}
|
}
|
||||||
@ -1 +1 @@
|
|||||||
1.0.9-rc7
|
1.0.10
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "@janhq/inference-cortex-extension",
|
"name": "@janhq/inference-cortex-extension",
|
||||||
"productName": "Cortex Inference Engine",
|
"productName": "Cortex Inference Engine",
|
||||||
"version": "1.0.24",
|
"version": "1.0.25",
|
||||||
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"node": "dist/node/index.cjs.js",
|
"node": "dist/node/index.cjs.js",
|
||||||
|
|||||||
@ -76,7 +76,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"key": "use_mmap",
|
"key": "use_mmap",
|
||||||
"title": "MMAP",
|
"title": "mmap",
|
||||||
"description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
|
"description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
|
||||||
"controllerType": "checkbox",
|
"controllerType": "checkbox",
|
||||||
"controllerProps": {
|
"controllerProps": {
|
||||||
|
|||||||
@ -0,0 +1,35 @@
|
|||||||
|
{
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"filename": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
|
||||||
|
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": "deepseek-r1-distill-llama-70b",
|
||||||
|
"object": "model",
|
||||||
|
"name": "DeepSeek R1 Distill Llama 70B Q4",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
|
||||||
|
"format": "gguf",
|
||||||
|
"settings": {
|
||||||
|
"ctx_len": 131072,
|
||||||
|
"prompt_template": "<|User|> {prompt} <|Assistant|>",
|
||||||
|
"llama_model_path": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
|
||||||
|
"ngl": 81
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"temperature": 0.6,
|
||||||
|
"top_p": 0.95,
|
||||||
|
"stream": true,
|
||||||
|
"max_tokens": 131072,
|
||||||
|
"stop": [],
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"author": "DeepSeek",
|
||||||
|
"tags": ["70B", "Featured"],
|
||||||
|
"size": 42500000000
|
||||||
|
},
|
||||||
|
"engine": "llama-cpp"
|
||||||
|
}
|
||||||
@ -0,0 +1,35 @@
|
|||||||
|
{
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"filename": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
|
||||||
|
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": "deepseek-r1-distill-llama-8b",
|
||||||
|
"object": "model",
|
||||||
|
"name": "DeepSeek R1 Distill Llama 8B Q5",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
|
||||||
|
"format": "gguf",
|
||||||
|
"settings": {
|
||||||
|
"ctx_len": 131072,
|
||||||
|
"prompt_template": "<|User|> {prompt} <|Assistant|>",
|
||||||
|
"llama_model_path": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
|
||||||
|
"ngl": 33
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"temperature": 0.6,
|
||||||
|
"top_p": 0.95,
|
||||||
|
"stream": true,
|
||||||
|
"max_tokens": 131072,
|
||||||
|
"stop": [],
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"author": "DeepSeek",
|
||||||
|
"tags": ["8B", "Featured"],
|
||||||
|
"size": 5730000000
|
||||||
|
},
|
||||||
|
"engine": "llama-cpp"
|
||||||
|
}
|
||||||
@ -0,0 +1,35 @@
|
|||||||
|
{
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"filename": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
|
||||||
|
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": "deepseek-r1-distill-qwen-1.5b",
|
||||||
|
"object": "model",
|
||||||
|
"name": "DeepSeek R1 Distill Qwen 1.5B Q5",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
|
||||||
|
"format": "gguf",
|
||||||
|
"settings": {
|
||||||
|
"ctx_len": 131072,
|
||||||
|
"prompt_template": "<|User|> {prompt} <|Assistant|>",
|
||||||
|
"llama_model_path": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
|
||||||
|
"ngl": 29
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"temperature": 0.6,
|
||||||
|
"top_p": 0.95,
|
||||||
|
"stream": true,
|
||||||
|
"max_tokens": 131072,
|
||||||
|
"stop": [],
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"author": "DeepSeek",
|
||||||
|
"tags": ["1.5B", "Featured"],
|
||||||
|
"size": 1290000000
|
||||||
|
},
|
||||||
|
"engine": "llama-cpp"
|
||||||
|
}
|
||||||
@ -0,0 +1,35 @@
|
|||||||
|
{
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"filename": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
|
||||||
|
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": "deepseek-r1-distill-qwen-14b",
|
||||||
|
"object": "model",
|
||||||
|
"name": "DeepSeek R1 Distill Qwen 14B Q4",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
|
||||||
|
"format": "gguf",
|
||||||
|
"settings": {
|
||||||
|
"ctx_len": 131072,
|
||||||
|
"prompt_template": "<|User|> {prompt} <|Assistant|>",
|
||||||
|
"llama_model_path": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
|
||||||
|
"ngl": 49
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"temperature": 0.6,
|
||||||
|
"top_p": 0.95,
|
||||||
|
"stream": true,
|
||||||
|
"max_tokens": 131072,
|
||||||
|
"stop": [],
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"author": "DeepSeek",
|
||||||
|
"tags": ["14B", "Featured"],
|
||||||
|
"size": 8990000000
|
||||||
|
},
|
||||||
|
"engine": "llama-cpp"
|
||||||
|
}
|
||||||
@ -0,0 +1,35 @@
|
|||||||
|
{
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"filename": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
|
||||||
|
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": "deepseek-r1-distill-qwen-32b",
|
||||||
|
"object": "model",
|
||||||
|
"name": "DeepSeek R1 Distill Qwen 32B Q4",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
|
||||||
|
"format": "gguf",
|
||||||
|
"settings": {
|
||||||
|
"ctx_len": 131072,
|
||||||
|
"prompt_template": "<|User|> {prompt} <|Assistant|>",
|
||||||
|
"llama_model_path": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
|
||||||
|
"ngl": 65
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"temperature": 0.6,
|
||||||
|
"top_p": 0.95,
|
||||||
|
"stream": true,
|
||||||
|
"max_tokens": 131072,
|
||||||
|
"stop": [],
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"author": "DeepSeek",
|
||||||
|
"tags": ["32B", "Featured"],
|
||||||
|
"size": 19900000000
|
||||||
|
},
|
||||||
|
"engine": "llama-cpp"
|
||||||
|
}
|
||||||
@ -0,0 +1,35 @@
|
|||||||
|
{
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"filename": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
|
||||||
|
"url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": "deepseek-r1-distill-qwen-7b",
|
||||||
|
"object": "model",
|
||||||
|
"name": "DeepSeek R1 Distill Qwen 7B Q5",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
|
||||||
|
"format": "gguf",
|
||||||
|
"settings": {
|
||||||
|
"ctx_len": 131072,
|
||||||
|
"prompt_template": "<|User|> {prompt} <|Assistant|>",
|
||||||
|
"llama_model_path": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
|
||||||
|
"ngl": 29
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"temperature": 0.6,
|
||||||
|
"top_p": 0.95,
|
||||||
|
"stream": true,
|
||||||
|
"max_tokens": 131072,
|
||||||
|
"stop": [],
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"author": "DeepSeek",
|
||||||
|
"tags": ["7B", "Featured"],
|
||||||
|
"size": 5440000000
|
||||||
|
},
|
||||||
|
"engine": "llama-cpp"
|
||||||
|
}
|
||||||
@ -22,19 +22,13 @@
|
|||||||
"top_p": 0.95,
|
"top_p": 0.95,
|
||||||
"stream": true,
|
"stream": true,
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"stop": [
|
"stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
|
||||||
"<|end_of_text|>",
|
|
||||||
"<|eot_id|>",
|
|
||||||
"<|eom_id|>"
|
|
||||||
],
|
|
||||||
"frequency_penalty": 0,
|
"frequency_penalty": 0,
|
||||||
"presence_penalty": 0
|
"presence_penalty": 0
|
||||||
},
|
},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"author": "MetaAI",
|
"author": "MetaAI",
|
||||||
"tags": [
|
"tags": ["8B", "Featured"],
|
||||||
"8B", "Featured"
|
|
||||||
],
|
|
||||||
"size": 4920000000
|
"size": 4920000000
|
||||||
},
|
},
|
||||||
"engine": "llama-cpp"
|
"engine": "llama-cpp"
|
||||||
|
|||||||
@ -49,6 +49,13 @@ import qwen2514bJson from './resources/models/qwen2.5-14b-instruct/model.json' w
|
|||||||
import qwen2532bJson from './resources/models/qwen2.5-32b-instruct/model.json' with { type: 'json' }
|
import qwen2532bJson from './resources/models/qwen2.5-32b-instruct/model.json' with { type: 'json' }
|
||||||
import qwen2572bJson from './resources/models/qwen2.5-72b-instruct/model.json' with { type: 'json' }
|
import qwen2572bJson from './resources/models/qwen2.5-72b-instruct/model.json' with { type: 'json' }
|
||||||
|
|
||||||
|
import deepseekR1DistillQwen_1_5b from './resources/models/deepseek-r1-distill-qwen-1.5b/model.json' with { type: 'json' }
|
||||||
|
import deepseekR1DistillQwen_7b from './resources/models/deepseek-r1-distill-qwen-7b/model.json' with { type: 'json' }
|
||||||
|
import deepseekR1DistillQwen_14b from './resources/models/deepseek-r1-distill-qwen-14b/model.json' with { type: 'json' }
|
||||||
|
import deepseekR1DistillQwen_32b from './resources/models/deepseek-r1-distill-qwen-32b/model.json' with { type: 'json' }
|
||||||
|
import deepseekR1DistillLlama_8b from './resources/models/deepseek-r1-distill-llama-8b/model.json' with { type: 'json' }
|
||||||
|
import deepseekR1DistillLlama_70b from './resources/models/deepseek-r1-distill-llama-70b/model.json' with { type: 'json' }
|
||||||
|
|
||||||
export default defineConfig([
|
export default defineConfig([
|
||||||
{
|
{
|
||||||
input: 'src/index.ts',
|
input: 'src/index.ts',
|
||||||
@ -106,6 +113,12 @@ export default defineConfig([
|
|||||||
qwen2514bJson,
|
qwen2514bJson,
|
||||||
qwen2532bJson,
|
qwen2532bJson,
|
||||||
qwen2572bJson,
|
qwen2572bJson,
|
||||||
|
deepseekR1DistillQwen_1_5b,
|
||||||
|
deepseekR1DistillQwen_7b,
|
||||||
|
deepseekR1DistillQwen_14b,
|
||||||
|
deepseekR1DistillQwen_32b,
|
||||||
|
deepseekR1DistillLlama_8b,
|
||||||
|
deepseekR1DistillLlama_70b,
|
||||||
]),
|
]),
|
||||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||||
SETTINGS: JSON.stringify(defaultSettingJson),
|
SETTINGS: JSON.stringify(defaultSettingJson),
|
||||||
|
|||||||
@ -112,8 +112,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
|||||||
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
|
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
|
||||||
|
|
||||||
// Run the process watchdog
|
// Run the process watchdog
|
||||||
const systemInfo = await systemInformation()
|
// const systemInfo = await systemInformation()
|
||||||
this.queue.add(() => executeOnMain(NODE, 'run', systemInfo))
|
this.queue.add(() => executeOnMain(NODE, 'run'))
|
||||||
this.queue.add(() => this.healthz())
|
this.queue.add(() => this.healthz())
|
||||||
this.subscribeToEvents()
|
this.subscribeToEvents()
|
||||||
|
|
||||||
|
|||||||
@ -16,15 +16,20 @@ let watchdog: ProcessWatchdog | undefined = undefined
|
|||||||
* Spawns a Nitro subprocess.
|
* Spawns a Nitro subprocess.
|
||||||
* @returns A promise that resolves when the Nitro subprocess is started.
|
* @returns A promise that resolves when the Nitro subprocess is started.
|
||||||
*/
|
*/
|
||||||
function run(systemInfo?: SystemInformation): Promise<any> {
|
function run(): Promise<any> {
|
||||||
log(`[CORTEX]:: Spawning cortex subprocess...`)
|
log(`[CORTEX]:: Spawning cortex subprocess...`)
|
||||||
|
|
||||||
return new Promise<void>(async (resolve, reject) => {
|
return new Promise<void>(async (resolve, reject) => {
|
||||||
let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
|
// let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||||
let binaryName = `cortex-server${process.platform === 'win32' ? '.exe' : ''}`
|
let binaryName = `cortex-server${
|
||||||
|
process.platform === 'win32' ? '.exe' : ''
|
||||||
|
}`
|
||||||
const binPath = path.join(__dirname, '..', 'bin')
|
const binPath = path.join(__dirname, '..', 'bin')
|
||||||
|
|
||||||
const executablePath = path.join(binPath, binaryName)
|
const executablePath = path.join(binPath, binaryName)
|
||||||
|
|
||||||
|
addEnvPaths(binPath)
|
||||||
|
|
||||||
const sharedPath = path.join(appResourcePath(), 'shared')
|
const sharedPath = path.join(appResourcePath(), 'shared')
|
||||||
// Execute the binary
|
// Execute the binary
|
||||||
log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`)
|
log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`)
|
||||||
@ -44,15 +49,17 @@ function run(systemInfo?: SystemInformation): Promise<any> {
|
|||||||
`${path.join(dataFolderPath, '.janrc')}`,
|
`${path.join(dataFolderPath, '.janrc')}`,
|
||||||
'--data_folder_path',
|
'--data_folder_path',
|
||||||
dataFolderPath,
|
dataFolderPath,
|
||||||
|
'--loglevel',
|
||||||
|
'INFO',
|
||||||
],
|
],
|
||||||
{
|
{
|
||||||
env: {
|
env: {
|
||||||
...process.env,
|
...process.env,
|
||||||
CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
|
// CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
|
||||||
// Vulkan - Support 1 device at a time for now
|
// // Vulkan - Support 1 device at a time for now
|
||||||
...(gpuVisibleDevices?.length > 0 && {
|
// ...(gpuVisibleDevices?.length > 0 && {
|
||||||
GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
|
// GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
|
||||||
}),
|
// }),
|
||||||
},
|
},
|
||||||
cwd: sharedPath,
|
cwd: sharedPath,
|
||||||
}
|
}
|
||||||
@ -71,6 +78,22 @@ function dispose() {
|
|||||||
watchdog?.terminate()
|
watchdog?.terminate()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the environment paths for the cortex subprocess
|
||||||
|
* @param dest
|
||||||
|
*/
|
||||||
|
function addEnvPaths(dest: string) {
|
||||||
|
// Add engine path to the PATH and LD_LIBRARY_PATH
|
||||||
|
if (process.platform === 'win32') {
|
||||||
|
process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
|
||||||
|
} else {
|
||||||
|
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
|
||||||
|
path.delimiter,
|
||||||
|
dest
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cortex process info
|
* Cortex process info
|
||||||
*/
|
*/
|
||||||
|
|||||||
@ -15,8 +15,6 @@ import {
|
|||||||
} from '@janhq/core'
|
} from '@janhq/core'
|
||||||
import { CortexAPI } from './cortex'
|
import { CortexAPI } from './cortex'
|
||||||
import { scanModelsFolder } from './legacy/model-json'
|
import { scanModelsFolder } from './legacy/model-json'
|
||||||
import { downloadModel } from './legacy/download'
|
|
||||||
import { systemInformation } from '@janhq/core'
|
|
||||||
import { deleteModelFiles } from './legacy/delete'
|
import { deleteModelFiles } from './legacy/delete'
|
||||||
|
|
||||||
export enum Settings {
|
export enum Settings {
|
||||||
@ -71,18 +69,6 @@ export default class JanModelExtension extends ModelExtension {
|
|||||||
* @returns A Promise that resolves when the model is downloaded.
|
* @returns A Promise that resolves when the model is downloaded.
|
||||||
*/
|
*/
|
||||||
async pullModel(model: string, id?: string, name?: string): Promise<void> {
|
async pullModel(model: string, id?: string, name?: string): Promise<void> {
|
||||||
if (id) {
|
|
||||||
const model: Model = ModelManager.instance().get(id)
|
|
||||||
// Clip vision model - should not be handled by cortex.cpp
|
|
||||||
// TensorRT model - should not be handled by cortex.cpp
|
|
||||||
if (
|
|
||||||
model &&
|
|
||||||
(model.engine === InferenceEngine.nitro_tensorrt_llm ||
|
|
||||||
model.settings.vision_model)
|
|
||||||
) {
|
|
||||||
return downloadModel(model, (await systemInformation()).gpuSetting)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* Sending POST to /models/pull/{id} endpoint to pull the model
|
* Sending POST to /models/pull/{id} endpoint to pull the model
|
||||||
*/
|
*/
|
||||||
|
|||||||
@ -2,15 +2,12 @@ import {
|
|||||||
downloadFile,
|
downloadFile,
|
||||||
DownloadRequest,
|
DownloadRequest,
|
||||||
fs,
|
fs,
|
||||||
GpuSetting,
|
|
||||||
InferenceEngine,
|
|
||||||
joinPath,
|
joinPath,
|
||||||
Model,
|
Model,
|
||||||
} from '@janhq/core'
|
} from '@janhq/core'
|
||||||
|
|
||||||
export const downloadModel = async (
|
export const downloadModel = async (
|
||||||
model: Model,
|
model: Model,
|
||||||
gpuSettings?: GpuSetting,
|
|
||||||
network?: { ignoreSSL?: boolean; proxy?: string }
|
network?: { ignoreSSL?: boolean; proxy?: string }
|
||||||
): Promise<void> => {
|
): Promise<void> => {
|
||||||
const homedir = 'file://models'
|
const homedir = 'file://models'
|
||||||
@ -27,41 +24,6 @@ export const downloadModel = async (
|
|||||||
JSON.stringify(model, null, 2)
|
JSON.stringify(model, null, 2)
|
||||||
)
|
)
|
||||||
|
|
||||||
if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
|
|
||||||
if (!gpuSettings || gpuSettings.gpus.length === 0) {
|
|
||||||
console.error('No GPU found. Please check your GPU setting.')
|
|
||||||
return
|
|
||||||
}
|
|
||||||
const firstGpu = gpuSettings.gpus[0]
|
|
||||||
if (!firstGpu.name.toLowerCase().includes('nvidia')) {
|
|
||||||
console.error('No Nvidia GPU found. Please check your GPU setting.')
|
|
||||||
return
|
|
||||||
}
|
|
||||||
const gpuArch = firstGpu.arch
|
|
||||||
if (gpuArch === undefined) {
|
|
||||||
console.error('No GPU architecture found. Please check your GPU setting.')
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!supportedGpuArch.includes(gpuArch)) {
|
|
||||||
console.debug(
|
|
||||||
`Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.`
|
|
||||||
)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const os = 'windows' // TODO: remove this hard coded value
|
|
||||||
|
|
||||||
const newSources = model.sources.map((source) => {
|
|
||||||
const newSource = { ...source }
|
|
||||||
newSource.url = newSource.url
|
|
||||||
.replace(/<os>/g, os)
|
|
||||||
.replace(/<gpuarch>/g, gpuArch)
|
|
||||||
return newSource
|
|
||||||
})
|
|
||||||
model.sources = newSources
|
|
||||||
}
|
|
||||||
|
|
||||||
console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
|
console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
|
||||||
|
|
||||||
if (model.sources.length > 1) {
|
if (model.sources.length > 1) {
|
||||||
|
|||||||
@ -1,75 +0,0 @@
|
|||||||
# Create a Jan Extension using Typescript
|
|
||||||
|
|
||||||
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
|
|
||||||
|
|
||||||
## Create Your Own Extension
|
|
||||||
|
|
||||||
To create your own extension, you can use this repository as a template! Just follow the below instructions:
|
|
||||||
|
|
||||||
1. Click the Use this template button at the top of the repository
|
|
||||||
2. Select Create a new repository
|
|
||||||
3. Select an owner and name for your new repository
|
|
||||||
4. Click Create repository
|
|
||||||
5. Clone your new repository
|
|
||||||
|
|
||||||
## Initial Setup
|
|
||||||
|
|
||||||
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
>
|
|
||||||
> You'll need to have a reasonably modern version of
|
|
||||||
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
|
|
||||||
> [`nodenv`](https://github.com/nodenv/nodenv) or
|
|
||||||
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
|
|
||||||
> root of your repository to install the version specified in
|
|
||||||
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
|
|
||||||
|
|
||||||
1. :hammer_and_wrench: Install the dependencies
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm install
|
|
||||||
```
|
|
||||||
|
|
||||||
1. :building_construction: Package the TypeScript for distribution
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm run bundle
|
|
||||||
```
|
|
||||||
|
|
||||||
1. :white_check_mark: Check your artifact
|
|
||||||
|
|
||||||
There will be a tgz file in your extension directory now
|
|
||||||
|
|
||||||
## Update the Extension Metadata
|
|
||||||
|
|
||||||
The [`package.json`](package.json) file defines metadata about your extension, such as
|
|
||||||
extension name, main entry, description and version.
|
|
||||||
|
|
||||||
When you copy this repository, update `package.json` with the name, description for your extension.
|
|
||||||
|
|
||||||
## Update the Extension Code
|
|
||||||
|
|
||||||
The [`src/`](./src/) directory is the heart of your extension! This contains the
|
|
||||||
source code that will be run when your extension functions are invoked. You can replace the
|
|
||||||
contents of this directory with your own code.
|
|
||||||
|
|
||||||
There are a few things to keep in mind when writing your extension code:
|
|
||||||
|
|
||||||
- Most Jan Extension functions are processed asynchronously.
|
|
||||||
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
import { events, MessageEvent, MessageRequest } from '@janhq/core'
|
|
||||||
|
|
||||||
function onStart(): Promise<any> {
|
|
||||||
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
|
|
||||||
this.inference(data)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
For more information about the Jan Extension Core module, see the
|
|
||||||
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
|
|
||||||
|
|
||||||
So, what are you waiting for? Go ahead and start customizing your extension!
|
|
||||||
@ -1,2 +0,0 @@
|
|||||||
@echo off
|
|
||||||
.\node_modules\.bin\download https://catalog.jan.ai/vulkaninfoSDK.exe -o ./bin
|
|
||||||
@ -1,49 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "@janhq/monitoring-extension",
|
|
||||||
"productName": "System Monitoring",
|
|
||||||
"version": "1.0.10",
|
|
||||||
"description": "Provides system health and OS level data.",
|
|
||||||
"main": "dist/index.js",
|
|
||||||
"node": "dist/node/index.cjs.js",
|
|
||||||
"author": "Jan <service@jan.ai>",
|
|
||||||
"license": "AGPL-3.0",
|
|
||||||
"scripts": {
|
|
||||||
"build": "rolldown -c rolldown.config.mjs && yarn download-artifacts",
|
|
||||||
"download-artifacts": "run-script-os && cpx \"bin/**\" \"dist/bin\"",
|
|
||||||
"download-artifacts:darwin": "echo 'No artifacts to download for darwin'",
|
|
||||||
"download-artifacts:win32": "download.bat",
|
|
||||||
"download-artifacts:linux": "download https://catalog.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
|
|
||||||
"build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
|
|
||||||
},
|
|
||||||
"exports": {
|
|
||||||
".": "./dist/index.js",
|
|
||||||
"./main": "./dist/node/index.cjs.js"
|
|
||||||
},
|
|
||||||
"devDependencies": {
|
|
||||||
"@types/node": "^20.11.4",
|
|
||||||
"@types/node-os-utils": "^1.3.4",
|
|
||||||
"cpx": "^1.5.0",
|
|
||||||
"download-cli": "^1.1.1",
|
|
||||||
"rimraf": "^3.0.2",
|
|
||||||
"rolldown": "1.0.0-beta.1",
|
|
||||||
"run-script-os": "^1.1.6",
|
|
||||||
"typescript": "^5.3.3"
|
|
||||||
},
|
|
||||||
"dependencies": {
|
|
||||||
"@janhq/core": "../../core/package.tgz",
|
|
||||||
"node-os-utils": "^1.3.7"
|
|
||||||
},
|
|
||||||
"files": [
|
|
||||||
"dist/*",
|
|
||||||
"package.json",
|
|
||||||
"README.md"
|
|
||||||
],
|
|
||||||
"bundleDependencies": [
|
|
||||||
"node-os-utils",
|
|
||||||
"@janhq/core"
|
|
||||||
],
|
|
||||||
"installConfig": {
|
|
||||||
"hoistingLimits": "workspaces"
|
|
||||||
},
|
|
||||||
"packageManager": "yarn@4.5.3"
|
|
||||||
}
|
|
||||||
@ -1,22 +0,0 @@
|
|||||||
[
|
|
||||||
{
|
|
||||||
"key": "log-enabled",
|
|
||||||
"title": "Enable App Logs",
|
|
||||||
"description": "Saves app logs locally on your computer. This enables you to send us crash reports.",
|
|
||||||
"controllerType": "checkbox",
|
|
||||||
"controllerProps": {
|
|
||||||
"value": true
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"key": "log-cleaning-interval",
|
|
||||||
"title": "Log Cleaning Interval",
|
|
||||||
"description": "Automatically delete local logs after a certain time interval (in milliseconds).",
|
|
||||||
"controllerType": "input",
|
|
||||||
"controllerProps": {
|
|
||||||
"value": "120000",
|
|
||||||
"placeholder": "Interval in milliseconds. E.g. 120000",
|
|
||||||
"textAlign": "right"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
@ -1,32 +0,0 @@
|
|||||||
import { defineConfig } from 'rolldown'
|
|
||||||
import packageJson from './package.json' with { type: 'json' }
|
|
||||||
import settingJson from './resources/settings.json' with { type: 'json' }
|
|
||||||
|
|
||||||
export default defineConfig([
|
|
||||||
{
|
|
||||||
input: 'src/index.ts',
|
|
||||||
output: {
|
|
||||||
format: 'esm',
|
|
||||||
file: 'dist/index.js',
|
|
||||||
},
|
|
||||||
platform: 'browser',
|
|
||||||
define: {
|
|
||||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
|
||||||
SETTINGS: JSON.stringify(settingJson),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: 'src/node/index.ts',
|
|
||||||
external: ['@janhq/core/node'],
|
|
||||||
output: {
|
|
||||||
format: 'cjs',
|
|
||||||
file: 'dist/node/index.cjs.js',
|
|
||||||
sourcemap: false,
|
|
||||||
inlineDynamicImports: true,
|
|
||||||
},
|
|
||||||
resolve: {
|
|
||||||
extensions: ['.js', '.ts', '.json'],
|
|
||||||
},
|
|
||||||
platform: 'node',
|
|
||||||
},
|
|
||||||
])
|
|
||||||
@ -1,19 +0,0 @@
|
|||||||
declare const NODE: string
|
|
||||||
declare const SETTINGS: SettingComponentProps[]
|
|
||||||
|
|
||||||
type CpuGpuInfo = {
|
|
||||||
cpu: {
|
|
||||||
usage: number
|
|
||||||
}
|
|
||||||
gpu: GpuInfo[]
|
|
||||||
}
|
|
||||||
|
|
||||||
type GpuInfo = {
|
|
||||||
id: string
|
|
||||||
name: string
|
|
||||||
temperature: string
|
|
||||||
utilization: string
|
|
||||||
memoryTotal: string
|
|
||||||
memoryFree: string
|
|
||||||
memoryUtilization: string
|
|
||||||
}
|
|
||||||
@ -1,90 +0,0 @@
|
|||||||
import {
|
|
||||||
AppConfigurationEventName,
|
|
||||||
GpuSetting,
|
|
||||||
MonitoringExtension,
|
|
||||||
OperatingSystemInfo,
|
|
||||||
events,
|
|
||||||
executeOnMain,
|
|
||||||
} from '@janhq/core'
|
|
||||||
|
|
||||||
enum Settings {
|
|
||||||
logEnabled = 'log-enabled',
|
|
||||||
logCleaningInterval = 'log-cleaning-interval',
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* JanMonitoringExtension is a extension that provides system monitoring functionality.
|
|
||||||
* It implements the MonitoringExtension interface from the @janhq/core package.
|
|
||||||
*/
|
|
||||||
export default class JanMonitoringExtension extends MonitoringExtension {
|
|
||||||
/**
|
|
||||||
* Called when the extension is loaded.
|
|
||||||
*/
|
|
||||||
async onLoad() {
|
|
||||||
// Register extension settings
|
|
||||||
this.registerSettings(SETTINGS)
|
|
||||||
|
|
||||||
const logEnabled = await this.getSetting<boolean>(Settings.logEnabled, true)
|
|
||||||
const logCleaningInterval = parseInt(
|
|
||||||
await this.getSetting<string>(Settings.logCleaningInterval, '120000')
|
|
||||||
)
|
|
||||||
// Register File Logger provided by this extension
|
|
||||||
await executeOnMain(NODE, 'registerLogger', {
|
|
||||||
logEnabled,
|
|
||||||
logCleaningInterval: isNaN(logCleaningInterval)
|
|
||||||
? 120000
|
|
||||||
: logCleaningInterval,
|
|
||||||
})
|
|
||||||
|
|
||||||
// Attempt to fetch nvidia info
|
|
||||||
await executeOnMain(NODE, 'updateNvidiaInfo')
|
|
||||||
events.emit(AppConfigurationEventName.OnConfigurationUpdate, {})
|
|
||||||
}
|
|
||||||
|
|
||||||
onSettingUpdate<T>(key: string, value: T): void {
|
|
||||||
if (key === Settings.logEnabled) {
|
|
||||||
executeOnMain(NODE, 'updateLogger', { logEnabled: value })
|
|
||||||
} else if (key === Settings.logCleaningInterval) {
|
|
||||||
executeOnMain(NODE, 'updateLogger', { logCleaningInterval: value })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Called when the extension is unloaded.
|
|
||||||
*/
|
|
||||||
onUnload(): void {
|
|
||||||
// Register File Logger provided by this extension
|
|
||||||
executeOnMain(NODE, 'unregisterLogger')
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the GPU configuration.
|
|
||||||
* @returns A Promise that resolves to an object containing the GPU configuration.
|
|
||||||
*/
|
|
||||||
async getGpuSetting(): Promise<GpuSetting | undefined> {
|
|
||||||
return executeOnMain(NODE, 'getGpuConfig')
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns information about the system resources.
|
|
||||||
* @returns A Promise that resolves to an object containing information about the system resources.
|
|
||||||
*/
|
|
||||||
getResourcesInfo(): Promise<any> {
|
|
||||||
return executeOnMain(NODE, 'getResourcesInfo')
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns information about the current system load.
|
|
||||||
* @returns A Promise that resolves to an object containing information about the current system load.
|
|
||||||
*/
|
|
||||||
getCurrentLoad(): Promise<any> {
|
|
||||||
return executeOnMain(NODE, 'getCurrentLoad')
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns information about the OS
|
|
||||||
* @returns
|
|
||||||
*/
|
|
||||||
getOsInfo(): Promise<OperatingSystemInfo> {
|
|
||||||
return executeOnMain(NODE, 'getOsInfo')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,389 +0,0 @@
|
|||||||
import {
|
|
||||||
GpuSetting,
|
|
||||||
GpuSettingInfo,
|
|
||||||
LoggerManager,
|
|
||||||
OperatingSystemInfo,
|
|
||||||
ResourceInfo,
|
|
||||||
SupportedPlatforms,
|
|
||||||
getJanDataFolderPath,
|
|
||||||
log,
|
|
||||||
} from '@janhq/core/node'
|
|
||||||
import { mem, cpu } from 'node-os-utils'
|
|
||||||
import { exec } from 'child_process'
|
|
||||||
import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs'
|
|
||||||
import path from 'path'
|
|
||||||
import os from 'os'
|
|
||||||
import { FileLogger } from './logger'
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Path to the settings directory
|
|
||||||
**/
|
|
||||||
export const SETTINGS_DIR = path.join(getJanDataFolderPath(), 'settings')
|
|
||||||
/**
|
|
||||||
* Path to the settings file
|
|
||||||
**/
|
|
||||||
export const GPU_INFO_FILE = path.join(SETTINGS_DIR, 'settings.json')
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default GPU settings
|
|
||||||
* TODO: This needs to be refactored to support multiple accelerators
|
|
||||||
**/
|
|
||||||
const DEFAULT_SETTINGS: GpuSetting = {
|
|
||||||
notify: true,
|
|
||||||
run_mode: 'cpu',
|
|
||||||
nvidia_driver: {
|
|
||||||
exist: false,
|
|
||||||
version: '',
|
|
||||||
},
|
|
||||||
cuda: {
|
|
||||||
exist: false,
|
|
||||||
version: '',
|
|
||||||
},
|
|
||||||
gpus: [],
|
|
||||||
gpu_highest_vram: '',
|
|
||||||
gpus_in_use: [],
|
|
||||||
is_initial: true,
|
|
||||||
// TODO: This needs to be set based on user toggle in settings
|
|
||||||
vulkan: false,
|
|
||||||
}
|
|
||||||
|
|
||||||
export const getGpuConfig = async (): Promise<GpuSetting | undefined> => {
|
|
||||||
if (process.platform === 'darwin') return undefined
|
|
||||||
if (existsSync(GPU_INFO_FILE))
|
|
||||||
return JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
|
|
||||||
return DEFAULT_SETTINGS
|
|
||||||
}
|
|
||||||
|
|
||||||
export const getResourcesInfo = async (): Promise<ResourceInfo> => {
|
|
||||||
const ramUsedInfo = await mem.used()
|
|
||||||
const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
|
|
||||||
const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
|
|
||||||
|
|
||||||
const resourceInfo: ResourceInfo = {
|
|
||||||
mem: {
|
|
||||||
totalMemory,
|
|
||||||
usedMemory,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
return resourceInfo
|
|
||||||
}
|
|
||||||
|
|
||||||
export const getCurrentLoad = () =>
|
|
||||||
new Promise<CpuGpuInfo>(async (resolve, reject) => {
|
|
||||||
const cpuPercentage = await cpu.usage()
|
|
||||||
let data = {
|
|
||||||
run_mode: 'cpu',
|
|
||||||
gpus_in_use: [],
|
|
||||||
}
|
|
||||||
|
|
||||||
if (process.platform !== 'darwin') {
|
|
||||||
data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
|
|
||||||
}
|
|
||||||
|
|
||||||
if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
|
|
||||||
const gpuIds = data.gpus_in_use.join(',')
|
|
||||||
if (gpuIds !== '' && data['vulkan'] !== true) {
|
|
||||||
exec(
|
|
||||||
`nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
|
|
||||||
(error, stdout, _) => {
|
|
||||||
if (error) {
|
|
||||||
console.error(`exec error: ${error}`)
|
|
||||||
throw new Error(error.message)
|
|
||||||
}
|
|
||||||
const gpuInfo: GpuInfo[] = stdout
|
|
||||||
.trim()
|
|
||||||
.split('\n')
|
|
||||||
.map((line) => {
|
|
||||||
const [
|
|
||||||
id,
|
|
||||||
name,
|
|
||||||
temperature,
|
|
||||||
utilization,
|
|
||||||
memoryTotal,
|
|
||||||
memoryFree,
|
|
||||||
memoryUtilization,
|
|
||||||
] = line.split(', ').map((item) => item.replace(/\r/g, ''))
|
|
||||||
return {
|
|
||||||
id,
|
|
||||||
name,
|
|
||||||
temperature,
|
|
||||||
utilization,
|
|
||||||
memoryTotal,
|
|
||||||
memoryFree,
|
|
||||||
memoryUtilization,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
resolve({
|
|
||||||
cpu: { usage: cpuPercentage },
|
|
||||||
gpu: gpuInfo,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
// Handle the case where gpuIds is empty
|
|
||||||
resolve({
|
|
||||||
cpu: { usage: cpuPercentage },
|
|
||||||
gpu: [],
|
|
||||||
})
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Handle the case where run_mode is not 'gpu' or no GPUs are in use
|
|
||||||
resolve({
|
|
||||||
cpu: { usage: cpuPercentage },
|
|
||||||
gpu: [],
|
|
||||||
})
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This will retrieve GPU information and persist settings.json
|
|
||||||
* Will be called when the extension is loaded to turn on GPU acceleration if supported
|
|
||||||
*/
|
|
||||||
export const updateNvidiaInfo = async () => {
|
|
||||||
// ignore if macos
|
|
||||||
if (process.platform === 'darwin') return
|
|
||||||
|
|
||||||
try {
|
|
||||||
JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
|
|
||||||
} catch (error) {
|
|
||||||
if (!existsSync(SETTINGS_DIR)) {
|
|
||||||
mkdirSync(SETTINGS_DIR, {
|
|
||||||
recursive: true,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2))
|
|
||||||
}
|
|
||||||
|
|
||||||
await updateNvidiaDriverInfo()
|
|
||||||
await updateGpuInfo()
|
|
||||||
}
|
|
||||||
|
|
||||||
const updateNvidiaDriverInfo = async () =>
|
|
||||||
new Promise((resolve, reject) => {
|
|
||||||
exec(
|
|
||||||
'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
|
|
||||||
(error, stdout) => {
|
|
||||||
const data: GpuSetting = JSON.parse(
|
|
||||||
readFileSync(GPU_INFO_FILE, 'utf-8')
|
|
||||||
)
|
|
||||||
|
|
||||||
if (!error) {
|
|
||||||
const firstLine = stdout.split('\n')[0].trim()
|
|
||||||
data.nvidia_driver.exist = true
|
|
||||||
data.nvidia_driver.version = firstLine
|
|
||||||
} else {
|
|
||||||
data.nvidia_driver.exist = false
|
|
||||||
}
|
|
||||||
|
|
||||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
|
|
||||||
resolve({})
|
|
||||||
}
|
|
||||||
)
|
|
||||||
})
|
|
||||||
|
|
||||||
const getGpuArch = (gpuName: string): string => {
|
|
||||||
if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'
|
|
||||||
|
|
||||||
if (gpuName.includes('30')) return 'ampere'
|
|
||||||
else if (gpuName.includes('40')) return 'ada'
|
|
||||||
else return 'unknown'
|
|
||||||
}
|
|
||||||
|
|
||||||
const updateGpuInfo = async () =>
|
|
||||||
new Promise((resolve, reject) => {
|
|
||||||
let data: GpuSetting = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
|
|
||||||
|
|
||||||
// Cuda
|
|
||||||
if (data.vulkan === true) {
|
|
||||||
// Vulkan
|
|
||||||
exec(
|
|
||||||
process.platform === 'win32'
|
|
||||||
? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
|
|
||||||
: `${__dirname}/../bin/vulkaninfo --summary`,
|
|
||||||
async (error, stdout) => {
|
|
||||||
if (!error) {
|
|
||||||
const output = stdout.toString()
|
|
||||||
|
|
||||||
log(output)
|
|
||||||
const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
|
|
||||||
|
|
||||||
const gpus: GpuSettingInfo[] = []
|
|
||||||
let match
|
|
||||||
while ((match = gpuRegex.exec(output)) !== null) {
|
|
||||||
const id = match[1]
|
|
||||||
const name = match[2]
|
|
||||||
const arch = getGpuArch(name)
|
|
||||||
gpus.push({ id, vram: '0', name, arch })
|
|
||||||
}
|
|
||||||
data.gpus = gpus
|
|
||||||
|
|
||||||
if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
|
|
||||||
data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
|
|
||||||
}
|
|
||||||
|
|
||||||
data = await updateCudaExistence(data)
|
|
||||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
|
|
||||||
log(`[APP]::${JSON.stringify(data)}`)
|
|
||||||
resolve({})
|
|
||||||
} else {
|
|
||||||
reject(error)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
exec(
|
|
||||||
'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
|
|
||||||
async (error, stdout) => {
|
|
||||||
if (!error) {
|
|
||||||
log(`[SPECS]::${stdout}`)
|
|
||||||
// Get GPU info and gpu has higher memory first
|
|
||||||
let highestVram = 0
|
|
||||||
let highestVramId = '0'
|
|
||||||
const gpus: GpuSettingInfo[] = stdout
|
|
||||||
.trim()
|
|
||||||
.split('\n')
|
|
||||||
.map((line) => {
|
|
||||||
let [id, vram, name] = line.split(', ')
|
|
||||||
const arch = getGpuArch(name)
|
|
||||||
vram = vram.replace(/\r/g, '')
|
|
||||||
if (parseFloat(vram) > highestVram) {
|
|
||||||
highestVram = parseFloat(vram)
|
|
||||||
highestVramId = id
|
|
||||||
}
|
|
||||||
return { id, vram, name, arch }
|
|
||||||
})
|
|
||||||
|
|
||||||
data.gpus = gpus
|
|
||||||
data.gpu_highest_vram = highestVramId
|
|
||||||
} else {
|
|
||||||
data.gpus = []
|
|
||||||
data.gpu_highest_vram = undefined
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
|
|
||||||
data.gpus_in_use = data.gpu_highest_vram ? [data.gpu_highest_vram].filter(e => !!e) : []
|
|
||||||
}
|
|
||||||
|
|
||||||
data = await updateCudaExistence(data)
|
|
||||||
console.log('[MONITORING]::Cuda info: ', data)
|
|
||||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
|
|
||||||
log(`[APP]::${JSON.stringify(data)}`)
|
|
||||||
resolve({})
|
|
||||||
}
|
|
||||||
)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if file exists in paths
|
|
||||||
*/
|
|
||||||
const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => {
|
|
||||||
return paths.some((p) => existsSync(path.join(p, file)))
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Validate cuda for linux and windows
|
|
||||||
*/
|
|
||||||
const updateCudaExistence = async (
|
|
||||||
data: GpuSetting = DEFAULT_SETTINGS
|
|
||||||
): Promise<GpuSetting> => {
|
|
||||||
let filesCuda12: string[]
|
|
||||||
let filesCuda11: string[]
|
|
||||||
let paths: string[]
|
|
||||||
let cudaVersion: string = ''
|
|
||||||
|
|
||||||
if (process.platform === 'win32') {
|
|
||||||
filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
|
|
||||||
filesCuda11 = ['cublas64_11.dll', 'cudart64_110.dll', 'cublasLt64_11.dll']
|
|
||||||
paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
|
|
||||||
} else {
|
|
||||||
filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
|
|
||||||
filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
|
|
||||||
paths = process.env.LD_LIBRARY_PATH
|
|
||||||
? process.env.LD_LIBRARY_PATH.split(path.delimiter)
|
|
||||||
: []
|
|
||||||
paths.push('/usr/lib/x86_64-linux-gnu/')
|
|
||||||
}
|
|
||||||
|
|
||||||
let cudaExists = filesCuda12.every(
|
|
||||||
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
|
|
||||||
)
|
|
||||||
|
|
||||||
if (!cudaExists) {
|
|
||||||
cudaExists = filesCuda11.every(
|
|
||||||
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
|
|
||||||
)
|
|
||||||
if (cudaExists) {
|
|
||||||
cudaVersion = '11'
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
cudaVersion = '12'
|
|
||||||
}
|
|
||||||
|
|
||||||
data.cuda.exist = cudaExists
|
|
||||||
data.cuda.version = cudaVersion
|
|
||||||
|
|
||||||
console.debug(data.is_initial, data.gpus_in_use)
|
|
||||||
|
|
||||||
if (cudaExists && data.is_initial && data.gpus_in_use.length > 0) {
|
|
||||||
data.run_mode = 'gpu'
|
|
||||||
}
|
|
||||||
|
|
||||||
data.is_initial = false
|
|
||||||
|
|
||||||
// Attempt to query CUDA using NVIDIA SMI
|
|
||||||
if (!cudaExists) {
|
|
||||||
await new Promise<void>((resolve) => {
|
|
||||||
exec('nvidia-smi', (error, stdout) => {
|
|
||||||
if (!error) {
|
|
||||||
const regex = /CUDA\s*Version:\s*(\d+\.\d+)/g
|
|
||||||
const match = regex.exec(stdout)
|
|
||||||
if (match && match[1]) {
|
|
||||||
data.cuda.version = match[1]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
console.log('[MONITORING]::Finalized cuda info update: ', data)
|
|
||||||
resolve()
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return data
|
|
||||||
}
|
|
||||||
|
|
||||||
export const getOsInfo = (): OperatingSystemInfo => {
|
|
||||||
const platform =
|
|
||||||
SupportedPlatforms.find((p) => p === process.platform) || 'unknown'
|
|
||||||
|
|
||||||
const osInfo: OperatingSystemInfo = {
|
|
||||||
platform: platform,
|
|
||||||
arch: process.arch,
|
|
||||||
release: os.release(),
|
|
||||||
machine: os.machine(),
|
|
||||||
version: os.version(),
|
|
||||||
totalMem: os.totalmem(),
|
|
||||||
freeMem: os.freemem(),
|
|
||||||
}
|
|
||||||
|
|
||||||
return osInfo
|
|
||||||
}
|
|
||||||
|
|
||||||
export const registerLogger = ({ logEnabled, logCleaningInterval }) => {
|
|
||||||
const logger = new FileLogger(logEnabled, logCleaningInterval)
|
|
||||||
LoggerManager.instance().register(logger)
|
|
||||||
logger.cleanLogs()
|
|
||||||
}
|
|
||||||
|
|
||||||
export const unregisterLogger = () => {
|
|
||||||
LoggerManager.instance().unregister('file')
|
|
||||||
}
|
|
||||||
|
|
||||||
export const updateLogger = ({ logEnabled, logCleaningInterval }) => {
|
|
||||||
const logger = LoggerManager.instance().loggers.get('file') as FileLogger
|
|
||||||
if (logger && logEnabled !== undefined) logger.logEnabled = logEnabled
|
|
||||||
if (logger && logCleaningInterval)
|
|
||||||
logger.logCleaningInterval = logCleaningInterval
|
|
||||||
// Rerun
|
|
||||||
logger && logger.cleanLogs()
|
|
||||||
}
|
|
||||||
@ -5,77 +5,470 @@
|
|||||||
"post": {
|
"post": {
|
||||||
"operationId": "AssistantsController_create",
|
"operationId": "AssistantsController_create",
|
||||||
"summary": "Create assistant",
|
"summary": "Create assistant",
|
||||||
"description": "Creates a new assistant.",
|
"description": "Creates a new assistant with the specified configuration.",
|
||||||
"parameters": [],
|
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"required": true,
|
"required": true,
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/components/schemas/CreateAssistantDto"
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"model": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The model identifier to use for the assistant."
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The name of the assistant."
|
||||||
|
},
|
||||||
|
"description": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The description of the assistant."
|
||||||
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Instructions for the assistant's behavior."
|
||||||
|
},
|
||||||
|
"tools": {
|
||||||
|
"type": "array",
|
||||||
|
"description": "A list of tools enabled on the assistant. Maximum of 128 tools.",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"code_interpreter",
|
||||||
|
"file_search",
|
||||||
|
"function"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"responses": {
|
"tool_resources": {
|
||||||
"201": {
|
"type": "object",
|
||||||
"description": "The assistant has been successfully created."
|
"description": "Resources used by the assistant's tools.",
|
||||||
|
"properties": {
|
||||||
|
"code_interpreter": {
|
||||||
|
"type": "object"
|
||||||
|
},
|
||||||
|
"file_search": {
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"tags": ["Assistants"]
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Set of key-value pairs for the assistant.",
|
||||||
|
"additionalProperties": true
|
||||||
},
|
},
|
||||||
"get": {
|
"temperature": {
|
||||||
"operationId": "AssistantsController_findAll",
|
"type": "number",
|
||||||
"summary": "List assistants",
|
"format": "float",
|
||||||
"description": "Returns a list of assistants.",
|
"description": "Temperature parameter for response generation."
|
||||||
"parameters": [
|
},
|
||||||
|
"top_p": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float",
|
||||||
|
"description": "Top p parameter for response generation."
|
||||||
|
},
|
||||||
|
"response_format": {
|
||||||
|
"oneOf": [
|
||||||
{
|
{
|
||||||
"name": "limit",
|
"type": "string",
|
||||||
"required": false,
|
"enum": ["auto"]
|
||||||
"in": "query",
|
|
||||||
"description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
|
|
||||||
"schema": {
|
|
||||||
"type": "number"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "order",
|
"type": "object"
|
||||||
"required": false,
|
}
|
||||||
"in": "query",
|
]
|
||||||
"description": "Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.",
|
|
||||||
"schema": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
"required": ["model"]
|
||||||
"name": "after",
|
}
|
||||||
"required": false,
|
}
|
||||||
"in": "query",
|
|
||||||
"description": "A cursor for use in pagination. after is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.",
|
|
||||||
"schema": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "before",
|
|
||||||
"required": false,
|
|
||||||
"in": "query",
|
|
||||||
"description": "A cursor for use in pagination. before is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.",
|
|
||||||
"schema": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"description": "Ok",
|
"description": "Ok",
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The unique identifier of the assistant."
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["assistant"],
|
||||||
|
"description": "The object type, which is always 'assistant'."
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Unix timestamp (in seconds) of when the assistant was created."
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The model identifier used by the assistant."
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The name of the assistant."
|
||||||
|
},
|
||||||
|
"description": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The description of the assistant."
|
||||||
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Instructions for the assistant's behavior."
|
||||||
|
},
|
||||||
|
"tools": {
|
||||||
|
"type": "array",
|
||||||
|
"description": "A list of tools enabled on the assistant.",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"code_interpreter",
|
||||||
|
"file_search",
|
||||||
|
"function"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tool_resources": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Resources used by the assistant's tools.",
|
||||||
|
"properties": {
|
||||||
|
"code_interpreter": {
|
||||||
|
"type": "object"
|
||||||
|
},
|
||||||
|
"file_search": {
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Set of key-value pairs that can be attached to the assistant.",
|
||||||
|
"additionalProperties": true
|
||||||
|
},
|
||||||
|
"temperature": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float",
|
||||||
|
"description": "Temperature parameter for response generation."
|
||||||
|
},
|
||||||
|
"top_p": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float",
|
||||||
|
"description": "Top p parameter for response generation."
|
||||||
|
},
|
||||||
|
"response_format": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["auto"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"id",
|
||||||
|
"object",
|
||||||
|
"created_at",
|
||||||
|
"model",
|
||||||
|
"metadata"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": ["Assistants"]
|
||||||
|
},
|
||||||
|
"patch": {
|
||||||
|
"operationId": "AssistantsController_update",
|
||||||
|
"summary": "Update assistant",
|
||||||
|
"description": "Updates an assistant. Requires at least one modifiable field.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "id",
|
||||||
|
"required": true,
|
||||||
|
"in": "path",
|
||||||
|
"description": "The unique identifier of the assistant.",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "OpenAI-Beta",
|
||||||
|
"required": true,
|
||||||
|
"in": "header",
|
||||||
|
"description": "Beta feature header.",
|
||||||
|
"schema": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["assistants=v2"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"requestBody": {
|
||||||
|
"required": true,
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"model": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The model identifier to use for the assistant."
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The name of the assistant."
|
||||||
|
},
|
||||||
|
"description": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The description of the assistant."
|
||||||
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Instructions for the assistant's behavior."
|
||||||
|
},
|
||||||
|
"tools": {
|
||||||
|
"type": "array",
|
||||||
|
"description": "A list of tools enabled on the assistant. Maximum of 128 tools.",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"code_interpreter",
|
||||||
|
"file_search",
|
||||||
|
"function"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tool_resources": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Resources used by the assistant's tools.",
|
||||||
|
"properties": {
|
||||||
|
"code_interpreter": {
|
||||||
|
"type": "object"
|
||||||
|
},
|
||||||
|
"file_search": {
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Set of key-value pairs for the assistant.",
|
||||||
|
"additionalProperties": true
|
||||||
|
},
|
||||||
|
"temperature": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float",
|
||||||
|
"description": "Temperature parameter for response generation."
|
||||||
|
},
|
||||||
|
"top_p": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float",
|
||||||
|
"description": "Top p parameter for response generation."
|
||||||
|
},
|
||||||
|
"response_format": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["auto"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"minProperties": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Ok",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The unique identifier of the assistant."
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["assistant"],
|
||||||
|
"description": "The object type, which is always 'assistant'."
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Unix timestamp (in seconds) of when the assistant was created."
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The model identifier used by the assistant."
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The name of the assistant."
|
||||||
|
},
|
||||||
|
"description": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The description of the assistant."
|
||||||
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Instructions for the assistant's behavior."
|
||||||
|
},
|
||||||
|
"tools": {
|
||||||
|
"type": "array",
|
||||||
|
"description": "A list of tools enabled on the assistant.",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"code_interpreter",
|
||||||
|
"file_search",
|
||||||
|
"function"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tool_resources": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Resources used by the assistant's tools.",
|
||||||
|
"properties": {
|
||||||
|
"code_interpreter": {
|
||||||
|
"type": "object"
|
||||||
|
},
|
||||||
|
"file_search": {
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Set of key-value pairs that can be attached to the assistant.",
|
||||||
|
"additionalProperties": true
|
||||||
|
},
|
||||||
|
"temperature": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float",
|
||||||
|
"description": "Temperature parameter for response generation."
|
||||||
|
},
|
||||||
|
"top_p": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float",
|
||||||
|
"description": "Top p parameter for response generation."
|
||||||
|
},
|
||||||
|
"response_format": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["auto"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"id",
|
||||||
|
"object",
|
||||||
|
"created_at",
|
||||||
|
"model",
|
||||||
|
"metadata"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": ["Assistants"]
|
||||||
|
},
|
||||||
|
"get": {
|
||||||
|
"operationId": "AssistantsController_list",
|
||||||
|
"summary": "List assistants",
|
||||||
|
"description": "Returns a list of assistants.",
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Ok",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["list"],
|
||||||
|
"description": "The object type, which is always 'list' for a list response."
|
||||||
|
},
|
||||||
|
"data": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AssistantEntity"
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The unique identifier of the assistant."
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["assistant"],
|
||||||
|
"description": "The object type, which is always 'assistant'."
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Unix timestamp (in seconds) of when the assistant was created."
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The model identifier used by the assistant."
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Set of key-value pairs that can be attached to the assistant.",
|
||||||
|
"additionalProperties": true
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"id",
|
||||||
|
"object",
|
||||||
|
"created_at",
|
||||||
|
"model",
|
||||||
|
"metadata"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["object", "data"]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -88,7 +481,77 @@
|
|||||||
"get": {
|
"get": {
|
||||||
"operationId": "AssistantsController_findOne",
|
"operationId": "AssistantsController_findOne",
|
||||||
"summary": "Get assistant",
|
"summary": "Get assistant",
|
||||||
"description": "Retrieves a specific assistant defined by an assistant's `id`.",
|
"description": "Retrieves a specific assistant by ID.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "id",
|
||||||
|
"required": true,
|
||||||
|
"in": "path",
|
||||||
|
"description": "The unique identifier of the assistant.",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "OpenAI-Beta",
|
||||||
|
"required": true,
|
||||||
|
"in": "header",
|
||||||
|
"description": "Beta feature header.",
|
||||||
|
"schema": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["assistants=v2"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Ok",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The unique identifier of the assistant."
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["assistant"],
|
||||||
|
"description": "The object type, which is always 'assistant'."
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Unix timestamp (in seconds) of when the assistant was created."
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The model identifier used by the assistant."
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Set of key-value pairs attached to the assistant.",
|
||||||
|
"additionalProperties": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"id",
|
||||||
|
"object",
|
||||||
|
"created_at",
|
||||||
|
"model",
|
||||||
|
"metadata"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": ["Assistants"]
|
||||||
|
},
|
||||||
|
"delete": {
|
||||||
|
"operationId": "AssistantsController_remove",
|
||||||
|
"summary": "Delete assistant",
|
||||||
|
"description": "Deletes a specific assistant by ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "id",
|
"name": "id",
|
||||||
@ -106,36 +569,24 @@
|
|||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/components/schemas/AssistantEntity"
|
"type": "object",
|
||||||
}
|
"properties": {
|
||||||
}
|
"id": {
|
||||||
}
|
"type": "string",
|
||||||
|
"description": "The unique identifier of the deleted assistant."
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["assistant.deleted"],
|
||||||
|
"description": "The object type for a deleted assistant."
|
||||||
|
},
|
||||||
|
"deleted": {
|
||||||
|
"type": "boolean",
|
||||||
|
"enum": [true],
|
||||||
|
"description": "Indicates the assistant was successfully deleted."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"tags": ["Assistants"]
|
"required": ["id", "object", "deleted"]
|
||||||
},
|
|
||||||
"delete": {
|
|
||||||
"operationId": "AssistantsController_remove",
|
|
||||||
"summary": "Delete assistant",
|
|
||||||
"description": "Deletes a specific assistant defined by an assistant's `id`.",
|
|
||||||
"parameters": [
|
|
||||||
{
|
|
||||||
"name": "id",
|
|
||||||
"required": true,
|
|
||||||
"in": "path",
|
|
||||||
"description": "The unique identifier of the assistant.",
|
|
||||||
"schema": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"responses": {
|
|
||||||
"200": {
|
|
||||||
"description": "The assistant has been successfully deleted.",
|
|
||||||
"content": {
|
|
||||||
"application/json": {
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/components/schemas/DeleteAssistantResponseDto"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2199,6 +2650,84 @@
|
|||||||
"tags": ["Engines"]
|
"tags": ["Engines"]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/engines/{name}/releases/{version}": {
|
||||||
|
"get": {
|
||||||
|
"summary": "List variants for a specific engine version",
|
||||||
|
"description": "Lists all available variants (builds) for a specific version of an engine. Variants can include different CPU architectures (AVX, AVX2, AVX512), GPU support (CUDA, Vulkan), and operating systems (Windows, Linux, macOS).",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "name",
|
||||||
|
"in": "path",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
|
||||||
|
"default": "llama-cpp"
|
||||||
|
},
|
||||||
|
"description": "The type of engine"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "version",
|
||||||
|
"in": "path",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"description": "The version of the engine"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "show",
|
||||||
|
"in": "query",
|
||||||
|
"required": false,
|
||||||
|
"schema": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["all", "compatible"],
|
||||||
|
"default": "all"
|
||||||
|
},
|
||||||
|
"description": "Filter the variants list. Use 'compatible' to show only variants compatible with the current system, or 'all' to show all available variants."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Successfully retrieved variants list",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The name of the variant, including OS, architecture, and capabilities",
|
||||||
|
"example": "linux-amd64-avx-cuda-11-7"
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "date-time",
|
||||||
|
"description": "Creation timestamp of the variant",
|
||||||
|
"example": "2024-11-13T04:51:16Z"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Size of the variant in bytes",
|
||||||
|
"example": 151224604
|
||||||
|
},
|
||||||
|
"download_count": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Number of times this variant has been downloaded",
|
||||||
|
"example": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": ["Engines"]
|
||||||
|
}
|
||||||
|
},
|
||||||
"/engines/{name}/releases/latest": {
|
"/engines/{name}/releases/latest": {
|
||||||
"get": {
|
"get": {
|
||||||
"summary": "Get latest release",
|
"summary": "Get latest release",
|
||||||
@ -2314,7 +2843,7 @@
|
|||||||
"get_models_url": {
|
"get_models_url": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The URL to get models",
|
"description": "The URL to get models",
|
||||||
"example": "https://api.openai.com/v1/models"
|
"example": "https://api.openai.com/models"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3378,6 +3907,7 @@
|
|||||||
"Files",
|
"Files",
|
||||||
"Hardware",
|
"Hardware",
|
||||||
"Events",
|
"Events",
|
||||||
|
"Assistants",
|
||||||
"Threads",
|
"Threads",
|
||||||
"Messages",
|
"Messages",
|
||||||
"Pulling Models",
|
"Pulling Models",
|
||||||
@ -4858,8 +5388,8 @@
|
|||||||
"engine",
|
"engine",
|
||||||
"version",
|
"version",
|
||||||
"inference_params",
|
"inference_params",
|
||||||
"TransformReq",
|
"transform_req",
|
||||||
"TransformResp",
|
"transform_resp",
|
||||||
"metadata"
|
"metadata"
|
||||||
],
|
],
|
||||||
"properties": {
|
"properties": {
|
||||||
@ -4867,9 +5397,9 @@
|
|||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The identifier of the model."
|
"description": "The identifier of the model."
|
||||||
},
|
},
|
||||||
"api_key_template": {
|
"header_template": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Template for the API key header."
|
"description": "Template for the header."
|
||||||
},
|
},
|
||||||
"engine": {
|
"engine": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
@ -4902,7 +5432,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"TransformReq": {
|
"transform_req": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"get_models": {
|
"get_models": {
|
||||||
@ -4924,7 +5454,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"TransformResp": {
|
"transform_resp": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"chat_completions": {
|
"chat_completions": {
|
||||||
@ -5632,9 +6162,9 @@
|
|||||||
"description": "Number of GPU layers.",
|
"description": "Number of GPU layers.",
|
||||||
"example": 33
|
"example": 33
|
||||||
},
|
},
|
||||||
"api_key_template": {
|
"header_template": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Template for the API key header."
|
"description": "Template for the header."
|
||||||
},
|
},
|
||||||
"version": {
|
"version": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
|||||||
@ -10,7 +10,9 @@ const AutoLink = ({ text }: Props) => {
|
|||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
{text.split(delimiter).map((word) => {
|
{text &&
|
||||||
|
typeof text === 'string' &&
|
||||||
|
text.split(delimiter).map((word) => {
|
||||||
const match = word.match(delimiter)
|
const match = word.match(delimiter)
|
||||||
if (match) {
|
if (match) {
|
||||||
const url = match[0]
|
const url = match[0]
|
||||||
|
|||||||
@ -23,7 +23,13 @@ import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
|
|||||||
import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
|
import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
|
||||||
import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
|
import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
|
||||||
|
|
||||||
const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
const ErrorMessage = ({
|
||||||
|
message,
|
||||||
|
errorComponent,
|
||||||
|
}: {
|
||||||
|
message?: ThreadMessage
|
||||||
|
errorComponent?: React.ReactNode
|
||||||
|
}) => {
|
||||||
const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
|
const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
|
||||||
const setMainState = useSetAtom(mainViewStateAtom)
|
const setMainState = useSetAtom(mainViewStateAtom)
|
||||||
const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
|
const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
|
||||||
@ -50,7 +56,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
|||||||
const getErrorTitle = () => {
|
const getErrorTitle = () => {
|
||||||
const engine = getEngine()
|
const engine = getEngine()
|
||||||
|
|
||||||
switch (message.metadata?.error_code) {
|
switch (message?.metadata?.error_code) {
|
||||||
case ErrorCode.InvalidApiKey:
|
case ErrorCode.InvalidApiKey:
|
||||||
case ErrorCode.AuthenticationError:
|
case ErrorCode.AuthenticationError:
|
||||||
return (
|
return (
|
||||||
@ -61,7 +67,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
|||||||
className="font-medium text-[hsla(var(--app-link))] underline"
|
className="font-medium text-[hsla(var(--app-link))] underline"
|
||||||
onClick={() => {
|
onClick={() => {
|
||||||
setMainState(MainViewState.Settings)
|
setMainState(MainViewState.Settings)
|
||||||
engine?.name && setSelectedSettingScreen(engine.name)
|
setSelectedSettingScreen(activeAssistant?.model?.engine ?? '')
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
Settings
|
Settings
|
||||||
@ -77,7 +83,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
|||||||
data-testid="passthrough-error-message"
|
data-testid="passthrough-error-message"
|
||||||
className="first-letter:uppercase"
|
className="first-letter:uppercase"
|
||||||
>
|
>
|
||||||
{message.content[0]?.text?.value === 'Failed to fetch' &&
|
{message?.content[0]?.text?.value === 'Failed to fetch' &&
|
||||||
engine &&
|
engine &&
|
||||||
engine?.name !== InferenceEngine.cortex_llamacpp ? (
|
engine?.name !== InferenceEngine.cortex_llamacpp ? (
|
||||||
<span>
|
<span>
|
||||||
@ -89,6 +95,9 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
|||||||
{message?.content[0]?.text?.value && (
|
{message?.content[0]?.text?.value && (
|
||||||
<AutoLink text={message?.content[0]?.text?.value} />
|
<AutoLink text={message?.content[0]?.text?.value} />
|
||||||
)}
|
)}
|
||||||
|
{!message?.content[0]?.text?.value && (
|
||||||
|
<span>Something went wrong. Please try again.</span>
|
||||||
|
)}
|
||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
</p>
|
</p>
|
||||||
@ -100,12 +109,15 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
|||||||
<div className="mx-auto my-6 max-w-[700px] px-4">
|
<div className="mx-auto my-6 max-w-[700px] px-4">
|
||||||
<div
|
<div
|
||||||
className="mx-auto max-w-[400px] rounded-lg border border-[hsla(var(--app-border))]"
|
className="mx-auto max-w-[400px] rounded-lg border border-[hsla(var(--app-border))]"
|
||||||
key={message.id}
|
key={message?.id}
|
||||||
>
|
>
|
||||||
<div className="flex justify-between border-b border-inherit px-4 py-2">
|
<div className="flex justify-between border-b border-inherit px-4 py-2">
|
||||||
<h6 className="text-[hsla(var(--destructive-bg))]">Error</h6>
|
<h6 className="flex items-center gap-x-1 font-semibold text-[hsla(var(--destructive-bg))]">
|
||||||
<div className="flex gap-x-4 text-xs">
|
<span className="h-2 w-2 rounded-full bg-[hsla(var(--destructive-bg))]" />
|
||||||
<div>
|
<span>Error</span>
|
||||||
|
</h6>
|
||||||
|
<div className="flex items-center gap-x-4 text-xs">
|
||||||
|
<div className="font-semibold">
|
||||||
<span
|
<span
|
||||||
className="flex cursor-pointer items-center gap-x-1 text-[hsla(var(--app-link))]"
|
className="flex cursor-pointer items-center gap-x-1 text-[hsla(var(--app-link))]"
|
||||||
onClick={() => setModalTroubleShooting(true)}
|
onClick={() => setModalTroubleShooting(true)}
|
||||||
@ -116,7 +128,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
|||||||
<ModalTroubleShooting />
|
<ModalTroubleShooting />
|
||||||
</div>
|
</div>
|
||||||
<div
|
<div
|
||||||
className="flex cursor-pointer items-center gap-x-1 text-[hsla(var(--text-secondary))]"
|
className="flex cursor-pointer items-center gap-x-1 font-semibold text-[hsla(var(--text-secondary))]"
|
||||||
onClick={handleCopy}
|
onClick={handleCopy}
|
||||||
>
|
>
|
||||||
{copied ? (
|
{copied ? (
|
||||||
@ -138,10 +150,10 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
|
|||||||
</div>
|
</div>
|
||||||
<div className="max-h-[80px] w-full overflow-x-auto p-4 py-2">
|
<div className="max-h-[80px] w-full overflow-x-auto p-4 py-2">
|
||||||
<div
|
<div
|
||||||
className="text-xs leading-relaxed text-[hsla(var(--text-secondary))]"
|
className="font-serif text-xs leading-relaxed text-[hsla(var(--text-secondary))]"
|
||||||
ref={errorDivRef}
|
ref={errorDivRef}
|
||||||
>
|
>
|
||||||
{getErrorTitle()}
|
{errorComponent ? errorComponent : getErrorTitle()}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@ -87,7 +87,7 @@ describe('SystemMonitor', () => {
|
|||||||
|
|
||||||
expect(screen.getByText('Running Models')).toBeInTheDocument()
|
expect(screen.getByText('Running Models')).toBeInTheDocument()
|
||||||
expect(screen.getByText('App Log')).toBeInTheDocument()
|
expect(screen.getByText('App Log')).toBeInTheDocument()
|
||||||
expect(screen.getByText('7.45/14.90 GB')).toBeInTheDocument()
|
expect(screen.getByText('7.45GB / 14.90GB')).toBeInTheDocument()
|
||||||
expect(screen.getByText('30%')).toBeInTheDocument()
|
expect(screen.getByText('30%')).toBeInTheDocument()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@ -134,8 +134,8 @@ const SystemMonitor = () => {
|
|||||||
<div className="flex items-center justify-between gap-2">
|
<div className="flex items-center justify-between gap-2">
|
||||||
<h6 className="font-bold">Memory</h6>
|
<h6 className="font-bold">Memory</h6>
|
||||||
<span>
|
<span>
|
||||||
{toGigabytes(usedRam, { hideUnit: true })}/
|
{toGigabytes(usedRam, { hideUnit: true })}GB /{' '}
|
||||||
{toGigabytes(totalRam, { hideUnit: true })} GB
|
{toGigabytes(totalRam, { hideUnit: true })}GB
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
<div className="flex items-center gap-x-4">
|
<div className="flex items-center gap-x-4">
|
||||||
@ -149,10 +149,12 @@ const SystemMonitor = () => {
|
|||||||
</div>
|
</div>
|
||||||
{gpus.length > 0 && (
|
{gpus.length > 0 && (
|
||||||
<div className="mb-4 border-b border-[hsla(var(--app-border))] pb-4 last:border-none">
|
<div className="mb-4 border-b border-[hsla(var(--app-border))] pb-4 last:border-none">
|
||||||
{gpus.map((gpu, index) => {
|
{gpus
|
||||||
|
.filter((gpu) => gpu.activated === true)
|
||||||
|
.map((gpu, index) => {
|
||||||
const gpuUtilization = utilizedMemory(
|
const gpuUtilization = utilizedMemory(
|
||||||
gpu.memoryFree,
|
gpu.free_vram,
|
||||||
gpu.memoryTotal
|
gpu.total_vram
|
||||||
)
|
)
|
||||||
return (
|
return (
|
||||||
<div key={index} className="mt-4 flex flex-col gap-x-2">
|
<div key={index} className="mt-4 flex flex-col gap-x-2">
|
||||||
@ -163,8 +165,8 @@ const SystemMonitor = () => {
|
|||||||
<div className="flex gap-x-2">
|
<div className="flex gap-x-2">
|
||||||
<div className="">
|
<div className="">
|
||||||
<span>
|
<span>
|
||||||
{gpu.memoryTotal - gpu.memoryFree}/
|
{gpu.total_vram - gpu.free_vram}/
|
||||||
{gpu.memoryTotal}
|
{gpu.total_vram}
|
||||||
</span>
|
</span>
|
||||||
<span> MB</span>
|
<span> MB</span>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@ -25,6 +25,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal'
|
|||||||
import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
|
import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
|
||||||
import SelectingModelModal from '@/screens/Settings/SelectingModelModal'
|
import SelectingModelModal from '@/screens/Settings/SelectingModelModal'
|
||||||
|
|
||||||
|
import { getAppDistinctId, updateDistinctId } from '@/utils/settings'
|
||||||
|
|
||||||
import LoadingModal from '../LoadingModal'
|
import LoadingModal from '../LoadingModal'
|
||||||
|
|
||||||
import MainViewContainer from '../MainViewContainer'
|
import MainViewContainer from '../MainViewContainer'
|
||||||
@ -96,8 +98,16 @@ const BaseLayout = () => {
|
|||||||
return properties
|
return properties
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
// Attempt to restore distinct Id from app global settings
|
||||||
|
getAppDistinctId()
|
||||||
|
.then((id) => {
|
||||||
|
if (id) posthog.identify(id)
|
||||||
|
})
|
||||||
|
.finally(() => {
|
||||||
posthog.opt_in_capturing()
|
posthog.opt_in_capturing()
|
||||||
posthog.register({ app_version: VERSION })
|
posthog.register({ app_version: VERSION })
|
||||||
|
updateDistinctId(posthog.get_distinct_id())
|
||||||
|
})
|
||||||
} else {
|
} else {
|
||||||
posthog.opt_out_capturing()
|
posthog.opt_out_capturing()
|
||||||
}
|
}
|
||||||
|
|||||||
@ -28,6 +28,8 @@ import ModelLabel from '@/containers/ModelLabel'
|
|||||||
|
|
||||||
import SetupRemoteModel from '@/containers/SetupRemoteModel'
|
import SetupRemoteModel from '@/containers/SetupRemoteModel'
|
||||||
|
|
||||||
|
import { useActiveModel } from '@/hooks/useActiveModel'
|
||||||
|
|
||||||
import { useCreateNewThread } from '@/hooks/useCreateNewThread'
|
import { useCreateNewThread } from '@/hooks/useCreateNewThread'
|
||||||
import useDownloadModel from '@/hooks/useDownloadModel'
|
import useDownloadModel from '@/hooks/useDownloadModel'
|
||||||
import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
|
import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
|
||||||
@ -40,7 +42,7 @@ import useUpdateModelParameters from '@/hooks/useUpdateModelParameters'
|
|||||||
import { formatDownloadPercentage, toGigabytes } from '@/utils/converter'
|
import { formatDownloadPercentage, toGigabytes } from '@/utils/converter'
|
||||||
|
|
||||||
import { manualRecommendationModel } from '@/utils/model'
|
import { manualRecommendationModel } from '@/utils/model'
|
||||||
import { getLogoEngine } from '@/utils/modelEngine'
|
import { getLogoEngine, getTitleByEngine } from '@/utils/modelEngine'
|
||||||
|
|
||||||
import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
|
import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
|
||||||
import {
|
import {
|
||||||
@ -93,6 +95,7 @@ const ModelDropdown = ({
|
|||||||
const { updateModelParameter } = useUpdateModelParameters()
|
const { updateModelParameter } = useUpdateModelParameters()
|
||||||
const searchInputRef = useRef<HTMLInputElement>(null)
|
const searchInputRef = useRef<HTMLInputElement>(null)
|
||||||
const configuredModels = useAtomValue(configuredModelsAtom)
|
const configuredModels = useAtomValue(configuredModelsAtom)
|
||||||
|
const { stopModel } = useActiveModel()
|
||||||
|
|
||||||
const featuredModels = configuredModels.filter(
|
const featuredModels = configuredModels.filter(
|
||||||
(x) =>
|
(x) =>
|
||||||
@ -226,6 +229,7 @@ const ModelDropdown = ({
|
|||||||
const model = downloadedModels.find((m) => m.id === modelId)
|
const model = downloadedModels.find((m) => m.id === modelId)
|
||||||
setSelectedModel(model)
|
setSelectedModel(model)
|
||||||
setOpen(false)
|
setOpen(false)
|
||||||
|
stopModel()
|
||||||
|
|
||||||
if (activeThread) {
|
if (activeThread) {
|
||||||
// Change assistand tools based on model support RAG
|
// Change assistand tools based on model support RAG
|
||||||
@ -248,18 +252,13 @@ const ModelDropdown = ({
|
|||||||
],
|
],
|
||||||
})
|
})
|
||||||
|
|
||||||
const defaultContextLength = Math.min(
|
const contextLength = model?.settings.ctx_len
|
||||||
8192,
|
? Math.min(8192, model?.settings.ctx_len ?? 8192)
|
||||||
model?.settings.ctx_len ?? 8192
|
: undefined
|
||||||
)
|
|
||||||
|
|
||||||
const overriddenParameters = {
|
const overriddenParameters = {
|
||||||
ctx_len: model?.settings.ctx_len ? defaultContextLength : undefined,
|
ctx_len: contextLength,
|
||||||
max_tokens: defaultContextLength
|
max_tokens: contextLength
|
||||||
? Math.min(
|
? Math.min(model?.parameters.max_tokens ?? 8192, contextLength)
|
||||||
model?.parameters.max_tokens ?? 8192,
|
|
||||||
defaultContextLength
|
|
||||||
)
|
|
||||||
: model?.parameters.max_tokens,
|
: model?.parameters.max_tokens,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -289,6 +288,7 @@ const ModelDropdown = ({
|
|||||||
updateThreadMetadata,
|
updateThreadMetadata,
|
||||||
setThreadModelParams,
|
setThreadModelParams,
|
||||||
updateModelParameter,
|
updateModelParameter,
|
||||||
|
stopModel,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -429,7 +429,7 @@ const ModelDropdown = ({
|
|||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
<h6 className="font-medium capitalize text-[hsla(var(--text-secondary))]">
|
<h6 className="font-medium capitalize text-[hsla(var(--text-secondary))]">
|
||||||
{engine.name}
|
{getTitleByEngine(engine.name)}
|
||||||
</h6>
|
</h6>
|
||||||
</div>
|
</div>
|
||||||
<div className="-mr-2 flex gap-1">
|
<div className="-mr-2 flex gap-1">
|
||||||
@ -475,7 +475,7 @@ const ModelDropdown = ({
|
|||||||
>
|
>
|
||||||
<div className="flex items-center gap-2">
|
<div className="flex items-center gap-2">
|
||||||
<p
|
<p
|
||||||
className="line-clamp-1 text-[hsla(var(--text-secondary))]"
|
className="max-w-[200px] overflow-hidden truncate whitespace-nowrap text-[hsla(var(--text-secondary))]"
|
||||||
title={model.name}
|
title={model.name}
|
||||||
>
|
>
|
||||||
{model.name}
|
{model.name}
|
||||||
@ -549,6 +549,8 @@ const ModelDropdown = ({
|
|||||||
(c) => c.id === model.id
|
(c) => c.id === model.id
|
||||||
)
|
)
|
||||||
return (
|
return (
|
||||||
|
<>
|
||||||
|
{isDownloaded && (
|
||||||
<li
|
<li
|
||||||
key={model.id}
|
key={model.id}
|
||||||
className={twMerge(
|
className={twMerge(
|
||||||
@ -558,7 +560,10 @@ const ModelDropdown = ({
|
|||||||
: 'text-[hsla(var(--text-primary))]'
|
: 'text-[hsla(var(--text-primary))]'
|
||||||
)}
|
)}
|
||||||
onClick={() => {
|
onClick={() => {
|
||||||
if (!isConfigured && engine.type === 'remote')
|
if (
|
||||||
|
!isConfigured &&
|
||||||
|
engine.type === 'remote'
|
||||||
|
)
|
||||||
return null
|
return null
|
||||||
if (isDownloaded) {
|
if (isDownloaded) {
|
||||||
onClickModelItem(model.id)
|
onClickModelItem(model.id)
|
||||||
@ -568,7 +573,7 @@ const ModelDropdown = ({
|
|||||||
<div className="flex gap-x-2">
|
<div className="flex gap-x-2">
|
||||||
<p
|
<p
|
||||||
className={twMerge(
|
className={twMerge(
|
||||||
'line-clamp-1',
|
'max-w-[200px] overflow-hidden truncate whitespace-nowrap',
|
||||||
!isDownloaded &&
|
!isDownloaded &&
|
||||||
'text-[hsla(var(--text-secondary))]'
|
'text-[hsla(var(--text-secondary))]'
|
||||||
)}
|
)}
|
||||||
@ -618,6 +623,8 @@ const ModelDropdown = ({
|
|||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
</li>
|
</li>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
)
|
)
|
||||||
})}
|
})}
|
||||||
</ul>
|
</ul>
|
||||||
|
|||||||
@ -29,15 +29,20 @@ const ModelLabel = ({ size, compact }: Props) => {
|
|||||||
const { settings } = useSettings()
|
const { settings } = useSettings()
|
||||||
|
|
||||||
const getLabel = (size: number) => {
|
const getLabel = (size: number) => {
|
||||||
const minimumRamModel = size * 1.25
|
const minimumRamModel = (size * 1.25) / (1024 * 1024)
|
||||||
const availableRam =
|
|
||||||
settings?.run_mode === 'gpu'
|
const availableRam = settings?.gpus?.some((gpu) => gpu.activated)
|
||||||
? availableVram * 1000000 // MB to bytes
|
? availableVram * 1000000 // MB to bytes
|
||||||
: totalRam - usedRam + (activeModel?.metadata?.size ?? 0)
|
: totalRam -
|
||||||
|
(usedRam +
|
||||||
|
(activeModel?.metadata?.size
|
||||||
|
? (activeModel.metadata.size * 1.25) / (1024 * 1024)
|
||||||
|
: 0))
|
||||||
|
|
||||||
if (minimumRamModel > totalRam) {
|
if (minimumRamModel > totalRam) {
|
||||||
return (
|
return (
|
||||||
<NotEnoughMemoryLabel
|
<NotEnoughMemoryLabel
|
||||||
unit={settings?.run_mode === 'gpu' ? 'VRAM' : 'RAM'}
|
unit={settings?.gpus?.some((gpu) => gpu.activated) ? 'VRAM' : 'RAM'}
|
||||||
compact={compact}
|
compact={compact}
|
||||||
/>
|
/>
|
||||||
)
|
)
|
||||||
|
|||||||
@ -143,8 +143,7 @@ export default function ModelHandler() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// The thread title should not be updated if the message is less than 10 words
|
// No new line character is presented in the title
|
||||||
// And no new line character is present
|
|
||||||
// And non-alphanumeric characters should be removed
|
// And non-alphanumeric characters should be removed
|
||||||
if (messageContent.includes('\n')) {
|
if (messageContent.includes('\n')) {
|
||||||
messageContent = messageContent.replace(/\n/g, ' ')
|
messageContent = messageContent.replace(/\n/g, ' ')
|
||||||
|
|||||||
@ -93,14 +93,8 @@ const ServerLogs = (props: ServerLogsProps) => {
|
|||||||
}, [listRef.current?.scrollHeight, isUserManuallyScrollingUp, logs])
|
}, [listRef.current?.scrollHeight, isUserManuallyScrollingUp, logs])
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<ScrollArea
|
<>
|
||||||
ref={listRef}
|
<div>
|
||||||
className={twMerge(
|
|
||||||
'h-[calc(100%-49px)] w-full p-4 py-0',
|
|
||||||
logs.length === 0 && 'mx-auto'
|
|
||||||
)}
|
|
||||||
onScroll={handleScroll}
|
|
||||||
>
|
|
||||||
{withCopy && (
|
{withCopy && (
|
||||||
<div className="absolute right-2 top-7">
|
<div className="absolute right-2 top-7">
|
||||||
<div className="flex w-full flex-row gap-2">
|
<div className="flex w-full flex-row gap-2">
|
||||||
@ -140,6 +134,15 @@ const ServerLogs = (props: ServerLogsProps) => {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
</div>
|
||||||
|
<ScrollArea
|
||||||
|
ref={listRef}
|
||||||
|
className={twMerge(
|
||||||
|
'h-[calc(100%-49px)] w-full p-4 py-0',
|
||||||
|
logs.length === 0 && 'mx-auto'
|
||||||
|
)}
|
||||||
|
onScroll={handleScroll}
|
||||||
|
>
|
||||||
<div className="flex h-full w-full flex-col">
|
<div className="flex h-full w-full flex-col">
|
||||||
{logs.length > 0 ? (
|
{logs.length > 0 ? (
|
||||||
<code className="inline-block max-w-[38vw] whitespace-break-spaces text-[13px] lg:max-w-[40vw] xl:max-w-[50vw]">
|
<code className="inline-block max-w-[38vw] whitespace-break-spaces text-[13px] lg:max-w-[40vw] xl:max-w-[50vw]">
|
||||||
@ -155,7 +158,7 @@ const ServerLogs = (props: ServerLogsProps) => {
|
|||||||
<div
|
<div
|
||||||
className={twMerge(
|
className={twMerge(
|
||||||
'mt-24 flex w-full flex-col items-center justify-center',
|
'mt-24 flex w-full flex-col items-center justify-center',
|
||||||
withCopy && 'mt-0 py-2'
|
withCopy && 'mt-4 py-2'
|
||||||
)}
|
)}
|
||||||
>
|
>
|
||||||
<svg
|
<svg
|
||||||
@ -287,11 +290,14 @@ const ServerLogs = (props: ServerLogsProps) => {
|
|||||||
</linearGradient>
|
</linearGradient>
|
||||||
</defs>
|
</defs>
|
||||||
</svg>
|
</svg>
|
||||||
<p className="text-[hsla(var(--text-secondary)] mt-4">Empty logs</p>
|
<p className="text-[hsla(var(--text-secondary)] mt-4">
|
||||||
|
Empty logs
|
||||||
|
</p>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
</ScrollArea>
|
</ScrollArea>
|
||||||
|
</>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -73,7 +73,7 @@ const SliderRightPanel = ({
|
|||||||
trigger={
|
trigger={
|
||||||
<Input
|
<Input
|
||||||
type="text"
|
type="text"
|
||||||
className="-mt-4 h-8 w-[60px]"
|
className="-mt-4 h-8 w-[68px]"
|
||||||
min={min}
|
min={min}
|
||||||
max={max}
|
max={max}
|
||||||
value={val}
|
value={val}
|
||||||
|
|||||||
@ -8,6 +8,8 @@ export const mainViewStateAtom = atom<MainViewState>(MainViewState.Thread)
|
|||||||
|
|
||||||
export const defaultJanDataFolderAtom = atom<string>('')
|
export const defaultJanDataFolderAtom = atom<string>('')
|
||||||
|
|
||||||
|
export const LocalEngineDefaultVariantAtom = atom<string>('')
|
||||||
|
|
||||||
const SHOW_RIGHT_PANEL = 'showRightPanel'
|
const SHOW_RIGHT_PANEL = 'showRightPanel'
|
||||||
|
|
||||||
// Store panel atom
|
// Store panel atom
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user