Merge branch 'dev' into dev
This commit is contained in:
commit
dd68ba9d61
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "@janhq/inference-cortex-extension",
|
"name": "@janhq/inference-cortex-extension",
|
||||||
"productName": "Cortex Inference Engine",
|
"productName": "Cortex Inference Engine",
|
||||||
"version": "1.0.20",
|
"version": "1.0.21",
|
||||||
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"node": "dist/node/index.cjs.js",
|
"node": "dist/node/index.cjs.js",
|
||||||
|
|||||||
@ -0,0 +1,36 @@
|
|||||||
|
{
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"filename": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf",
|
||||||
|
"url": "https://huggingface.co/bartowski/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": "qwen2.5-coder-14b-instruct",
|
||||||
|
"object": "model",
|
||||||
|
"name": "Qwen2.5 Coder 14B Instruct Q4",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.",
|
||||||
|
"format": "gguf",
|
||||||
|
"settings": {
|
||||||
|
"ctx_len": 32768,
|
||||||
|
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||||
|
"llama_model_path": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf",
|
||||||
|
"ngl": 29
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"temperature": 0.7,
|
||||||
|
"top_p": 0.95,
|
||||||
|
"stream": true,
|
||||||
|
"max_tokens": 32768,
|
||||||
|
"stop": ["<|endoftext|>", "<|im_end|>"],
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"author": "QwenLM",
|
||||||
|
"tags": ["14B", "Featured"],
|
||||||
|
"size": 8990000000
|
||||||
|
},
|
||||||
|
"engine": "llama-cpp"
|
||||||
|
}
|
||||||
|
|
||||||
@ -0,0 +1,36 @@
|
|||||||
|
{
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"filename": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf",
|
||||||
|
"url": "https://huggingface.co/bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": "qwen2.5-coder-32b-instruct",
|
||||||
|
"object": "model",
|
||||||
|
"name": "Qwen2.5 Coder 32B Instruct Q4",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.",
|
||||||
|
"format": "gguf",
|
||||||
|
"settings": {
|
||||||
|
"ctx_len": 32768,
|
||||||
|
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||||
|
"llama_model_path": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf",
|
||||||
|
"ngl": 29
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"temperature": 0.7,
|
||||||
|
"top_p": 0.95,
|
||||||
|
"stream": true,
|
||||||
|
"max_tokens": 32768,
|
||||||
|
"stop": ["<|endoftext|>", "<|im_end|>"],
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"author": "QwenLM",
|
||||||
|
"tags": ["32B", "Featured"],
|
||||||
|
"size": 19900000000
|
||||||
|
},
|
||||||
|
"engine": "llama-cpp"
|
||||||
|
}
|
||||||
|
|
||||||
@ -49,6 +49,8 @@ const llama321bJson = require('./resources/models/llama3.2-1b-instruct/model.jso
|
|||||||
const llama323bJson = require('./resources/models/llama3.2-3b-instruct/model.json')
|
const llama323bJson = require('./resources/models/llama3.2-3b-instruct/model.json')
|
||||||
const qwen257bJson = require('./resources/models/qwen2.5-7b-instruct/model.json')
|
const qwen257bJson = require('./resources/models/qwen2.5-7b-instruct/model.json')
|
||||||
const qwen25coder7bJson = require('./resources/models/qwen2.5-coder-7b-instruct/model.json')
|
const qwen25coder7bJson = require('./resources/models/qwen2.5-coder-7b-instruct/model.json')
|
||||||
|
const qwen25coder14bJson = require('./resources/models/qwen2.5-coder-14b-instruct/model.json')
|
||||||
|
const qwen25coder32bJson = require('./resources/models/qwen2.5-coder-32b-instruct/model.json')
|
||||||
const qwen2514bJson = require('./resources/models/qwen2.5-14b-instruct/model.json')
|
const qwen2514bJson = require('./resources/models/qwen2.5-14b-instruct/model.json')
|
||||||
const qwen2532bJson = require('./resources/models/qwen2.5-32b-instruct/model.json')
|
const qwen2532bJson = require('./resources/models/qwen2.5-32b-instruct/model.json')
|
||||||
const qwen2572bJson = require('./resources/models/qwen2.5-72b-instruct/model.json')
|
const qwen2572bJson = require('./resources/models/qwen2.5-72b-instruct/model.json')
|
||||||
@ -108,6 +110,8 @@ export default [
|
|||||||
llama323bJson,
|
llama323bJson,
|
||||||
qwen257bJson,
|
qwen257bJson,
|
||||||
qwen25coder7bJson,
|
qwen25coder7bJson,
|
||||||
|
qwen25coder14bJson,
|
||||||
|
qwen25coder32bJson,
|
||||||
qwen2514bJson,
|
qwen2514bJson,
|
||||||
qwen2532bJson,
|
qwen2532bJson,
|
||||||
qwen2572bJson,
|
qwen2572bJson,
|
||||||
@ -115,6 +119,7 @@ export default [
|
|||||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||||
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
||||||
CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
|
CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
|
||||||
|
CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
|
||||||
}),
|
}),
|
||||||
// Allow json resolution
|
// Allow json resolution
|
||||||
json(),
|
json(),
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
declare const NODE: string
|
declare const NODE: string
|
||||||
declare const CORTEX_API_URL: string
|
declare const CORTEX_API_URL: string
|
||||||
|
declare const CORTEX_SOCKET_URL: string
|
||||||
declare const DEFAULT_SETTINGS: Array<any>
|
declare const DEFAULT_SETTINGS: Array<any>
|
||||||
declare const MODELS: Array<any>
|
declare const MODELS: Array<any>
|
||||||
|
|
||||||
|
|||||||
@ -16,17 +16,29 @@ import {
|
|||||||
getJanDataFolderPath,
|
getJanDataFolderPath,
|
||||||
extractModelLoadParams,
|
extractModelLoadParams,
|
||||||
fs,
|
fs,
|
||||||
|
events,
|
||||||
|
ModelEvent
|
||||||
} from '@janhq/core'
|
} from '@janhq/core'
|
||||||
import PQueue from 'p-queue'
|
import PQueue from 'p-queue'
|
||||||
import ky from 'ky'
|
import ky from 'ky'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Event subscription types of Downloader
|
||||||
|
*/
|
||||||
|
enum DownloadTypes {
|
||||||
|
DownloadUpdated = 'onFileDownloadUpdate',
|
||||||
|
DownloadError = 'onFileDownloadError',
|
||||||
|
DownloadSuccess = 'onFileDownloadSuccess',
|
||||||
|
DownloadStopped = 'onFileDownloadStopped',
|
||||||
|
DownloadStarted = 'onFileDownloadStarted',
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
||||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||||
*/
|
*/
|
||||||
export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||||
// DEPRECATED
|
|
||||||
nodeModule: string = 'node'
|
nodeModule: string = 'node'
|
||||||
|
|
||||||
queue = new PQueue({ concurrency: 1 })
|
queue = new PQueue({ concurrency: 1 })
|
||||||
@ -38,6 +50,11 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
|||||||
*/
|
*/
|
||||||
inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions`
|
inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions`
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Socket instance of events subscription
|
||||||
|
*/
|
||||||
|
socket?: WebSocket = undefined
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Subscribes to events emitted by the @janhq/core package.
|
* Subscribes to events emitted by the @janhq/core package.
|
||||||
*/
|
*/
|
||||||
@ -55,6 +72,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
|||||||
|
|
||||||
this.queue.add(() => this.healthz())
|
this.queue.add(() => this.healthz())
|
||||||
|
|
||||||
|
this.subscribeToEvents()
|
||||||
|
|
||||||
window.addEventListener('beforeunload', () => {
|
window.addEventListener('beforeunload', () => {
|
||||||
this.clean()
|
this.clean()
|
||||||
})
|
})
|
||||||
@ -138,7 +157,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
|||||||
methods: ['get'],
|
methods: ['get'],
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
.then(() => {})
|
.then(() => { })
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -154,6 +173,50 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
|||||||
// Do nothing
|
// Do nothing
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Subscribe to cortex.cpp websocket events
|
||||||
|
*/
|
||||||
|
subscribeToEvents() {
|
||||||
|
this.queue.add(
|
||||||
|
() =>
|
||||||
|
new Promise<void>((resolve) => {
|
||||||
|
this.socket = new WebSocket(`${CORTEX_SOCKET_URL}/events`)
|
||||||
|
|
||||||
|
this.socket.addEventListener('message', (event) => {
|
||||||
|
const data = JSON.parse(event.data)
|
||||||
|
const transferred = data.task.items.reduce(
|
||||||
|
(acc: number, cur: any) => acc + cur.downloadedBytes,
|
||||||
|
0
|
||||||
|
)
|
||||||
|
const total = data.task.items.reduce(
|
||||||
|
(acc: number, cur: any) => acc + cur.bytes,
|
||||||
|
0
|
||||||
|
)
|
||||||
|
const percent = total > 0 ? transferred / total : 0
|
||||||
|
|
||||||
|
events.emit(DownloadTypes[data.type as keyof typeof DownloadTypes], {
|
||||||
|
modelId: data.task.id,
|
||||||
|
percent: percent,
|
||||||
|
size: {
|
||||||
|
transferred: transferred,
|
||||||
|
total: total,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
// Update models list from Hub
|
||||||
|
if (data.type === DownloadTypes.DownloadSuccess) {
|
||||||
|
// Delay for the state update from cortex.cpp
|
||||||
|
// Just to be sure
|
||||||
|
setTimeout(() => {
|
||||||
|
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||||
|
}, 500)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
resolve()
|
||||||
|
})
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Legacy
|
/// Legacy
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
import PQueue from 'p-queue'
|
import PQueue from 'p-queue'
|
||||||
import ky from 'ky'
|
import ky from 'ky'
|
||||||
import { events, extractModelLoadParams, Model, ModelEvent } from '@janhq/core'
|
import { extractModelLoadParams, Model } from '@janhq/core'
|
||||||
import { extractInferenceParams } from '@janhq/core'
|
import { extractInferenceParams } from '@janhq/core'
|
||||||
/**
|
/**
|
||||||
* cortex.cpp Model APIs interface
|
* cortex.cpp Model APIs interface
|
||||||
@ -24,21 +24,11 @@ type ModelList = {
|
|||||||
data: any[]
|
data: any[]
|
||||||
}
|
}
|
||||||
|
|
||||||
enum DownloadTypes {
|
|
||||||
DownloadUpdated = 'onFileDownloadUpdate',
|
|
||||||
DownloadError = 'onFileDownloadError',
|
|
||||||
DownloadSuccess = 'onFileDownloadSuccess',
|
|
||||||
DownloadStopped = 'onFileDownloadStopped',
|
|
||||||
DownloadStarted = 'onFileDownloadStarted',
|
|
||||||
}
|
|
||||||
|
|
||||||
export class CortexAPI implements ICortexAPI {
|
export class CortexAPI implements ICortexAPI {
|
||||||
queue = new PQueue({ concurrency: 1 })
|
queue = new PQueue({ concurrency: 1 })
|
||||||
socket?: WebSocket = undefined
|
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
this.queue.add(() => this.healthz())
|
this.queue.add(() => this.healthz())
|
||||||
this.subscribeToEvents()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -172,49 +162,6 @@ export class CortexAPI implements ICortexAPI {
|
|||||||
.then(() => {})
|
.then(() => {})
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Subscribe to cortex.cpp websocket events
|
|
||||||
*/
|
|
||||||
subscribeToEvents() {
|
|
||||||
this.queue.add(
|
|
||||||
() =>
|
|
||||||
new Promise<void>((resolve) => {
|
|
||||||
this.socket = new WebSocket(`${SOCKET_URL}/events`)
|
|
||||||
|
|
||||||
this.socket.addEventListener('message', (event) => {
|
|
||||||
const data = JSON.parse(event.data)
|
|
||||||
const transferred = data.task.items.reduce(
|
|
||||||
(acc, cur) => acc + cur.downloadedBytes,
|
|
||||||
0
|
|
||||||
)
|
|
||||||
const total = data.task.items.reduce(
|
|
||||||
(acc, cur) => acc + cur.bytes,
|
|
||||||
0
|
|
||||||
)
|
|
||||||
const percent = total > 0 ? transferred / total : 0
|
|
||||||
|
|
||||||
events.emit(DownloadTypes[data.type], {
|
|
||||||
modelId: data.task.id,
|
|
||||||
percent: percent,
|
|
||||||
size: {
|
|
||||||
transferred: transferred,
|
|
||||||
total: total,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
// Update models list from Hub
|
|
||||||
if (data.type === DownloadTypes.DownloadSuccess) {
|
|
||||||
// Delay for the state update from cortex.cpp
|
|
||||||
// Just to be sure
|
|
||||||
setTimeout(() => {
|
|
||||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
|
||||||
}, 500)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
resolve()
|
|
||||||
})
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TRansform model to the expected format (e.g. parameters, settings, metadata)
|
* TRansform model to the expected format (e.g. parameters, settings, metadata)
|
||||||
* @param model
|
* @param model
|
||||||
|
|||||||
@ -79,7 +79,7 @@ const SystemMonitor = () => {
|
|||||||
{showSystemMonitorPanel && (
|
{showSystemMonitorPanel && (
|
||||||
<div
|
<div
|
||||||
className={twMerge(
|
className={twMerge(
|
||||||
'fixed bottom-9 left-[49px] z-50 flex h-[200px] w-[calc(100%-48px)] flex-shrink-0 flex-col border-t border-[hsla(var(--app-border))] bg-[hsla(var(--app-bg))]',
|
'system-monitor-panel fixed bottom-9 left-[49px] z-50 flex w-[calc(100%-48px)] flex-shrink-0 flex-col border-t border-[hsla(var(--app-border))] bg-[hsla(var(--app-bg))]',
|
||||||
showFullScreen && 'h-[calc(100%-63px)]',
|
showFullScreen && 'h-[calc(100%-63px)]',
|
||||||
reduceTransparent && 'w-[calc(100%-48px)] rounded-none'
|
reduceTransparent && 'w-[calc(100%-48px)] rounded-none'
|
||||||
)}
|
)}
|
||||||
@ -147,7 +147,6 @@ const SystemMonitor = () => {
|
|||||||
<span className="flex-shrink-0 ">{ramUtilitized}%</span>
|
<span className="flex-shrink-0 ">{ramUtilitized}%</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{gpus.length > 0 && (
|
{gpus.length > 0 && (
|
||||||
<div className="mb-4 border-b border-[hsla(var(--app-border))] pb-4 last:border-none">
|
<div className="mb-4 border-b border-[hsla(var(--app-border))] pb-4 last:border-none">
|
||||||
{gpus.map((gpu, index) => {
|
{gpus.map((gpu, index) => {
|
||||||
|
|||||||
@ -189,7 +189,7 @@ const Advanced = () => {
|
|||||||
* @param gpuId
|
* @param gpuId
|
||||||
* @returns
|
* @returns
|
||||||
*/
|
*/
|
||||||
const handleGPUChange = (gpuId: string) => {
|
const handleGPUChange = async (gpuId: string) => {
|
||||||
let updatedGpusInUse = [...gpusInUse]
|
let updatedGpusInUse = [...gpusInUse]
|
||||||
if (updatedGpusInUse.includes(gpuId)) {
|
if (updatedGpusInUse.includes(gpuId)) {
|
||||||
updatedGpusInUse = updatedGpusInUse.filter((id) => id !== gpuId)
|
updatedGpusInUse = updatedGpusInUse.filter((id) => id !== gpuId)
|
||||||
@ -208,7 +208,7 @@ const Advanced = () => {
|
|||||||
updatedGpusInUse.push(gpuId)
|
updatedGpusInUse.push(gpuId)
|
||||||
}
|
}
|
||||||
setGpusInUse(updatedGpusInUse)
|
setGpusInUse(updatedGpusInUse)
|
||||||
saveSettings({ gpusInUse: updatedGpusInUse })
|
await saveSettings({ gpusInUse: updatedGpusInUse })
|
||||||
window.core?.api?.relaunch()
|
window.core?.api?.relaunch()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -306,8 +306,13 @@ const Advanced = () => {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
// Stop any running model to apply the changes
|
// Stop any running model to apply the changes
|
||||||
if (e.target.checked !== gpuEnabled)
|
if (e.target.checked !== gpuEnabled) {
|
||||||
stopModel().then(() => window.core?.api?.relaunch())
|
stopModel().finally(() => {
|
||||||
|
setTimeout(() => {
|
||||||
|
window.location.reload()
|
||||||
|
}, 300)
|
||||||
|
})
|
||||||
|
}
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
}
|
}
|
||||||
|
|||||||
@ -147,6 +147,20 @@ const ThreadCenterPanel = () => {
|
|||||||
|
|
||||||
const showSystemMonitorPanel = useAtomValue(showSystemMonitorPanelAtom)
|
const showSystemMonitorPanel = useAtomValue(showSystemMonitorPanelAtom)
|
||||||
|
|
||||||
|
const [height, setHeight] = useState<number>(0)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (showSystemMonitorPanel) {
|
||||||
|
const element = document.querySelector('.system-monitor-panel')
|
||||||
|
|
||||||
|
if (element) {
|
||||||
|
setHeight(element.clientHeight) // You can also use offsetHeight if needed
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
setHeight(0)
|
||||||
|
}
|
||||||
|
}, [showSystemMonitorPanel])
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<CenterPanelContainer>
|
<CenterPanelContainer>
|
||||||
<div
|
<div
|
||||||
@ -193,9 +207,10 @@ const ThreadCenterPanel = () => {
|
|||||||
)}
|
)}
|
||||||
<div
|
<div
|
||||||
className={twMerge(
|
className={twMerge(
|
||||||
'flex h-full w-full flex-col justify-between',
|
'flex h-full w-full flex-col justify-between'
|
||||||
showSystemMonitorPanel && 'h-[calc(100%-200px)]'
|
// showSystemMonitorPanel && `h-[calc(100%-${height}px)]`
|
||||||
)}
|
)}
|
||||||
|
style={{ height: `calc(100% - ${height}px)` }}
|
||||||
>
|
>
|
||||||
{activeThread ? (
|
{activeThread ? (
|
||||||
<div className="flex h-full w-full overflow-x-hidden">
|
<div className="flex h-full w-full overflow-x-hidden">
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user