Merge branch 'dev' into dev
This commit is contained in:
commit
dd68ba9d61
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@janhq/inference-cortex-extension",
|
||||
"productName": "Cortex Inference Engine",
|
||||
"version": "1.0.20",
|
||||
"version": "1.0.21",
|
||||
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||
"main": "dist/index.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/bartowski/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "qwen2.5-coder-14b-instruct",
|
||||
"object": "model",
|
||||
"name": "Qwen2.5 Coder 14B Instruct Q4",
|
||||
"version": "1.0",
|
||||
"description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf",
|
||||
"ngl": 29
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 32768,
|
||||
"stop": ["<|endoftext|>", "<|im_end|>"],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "QwenLM",
|
||||
"tags": ["14B", "Featured"],
|
||||
"size": 8990000000
|
||||
},
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "qwen2.5-coder-32b-instruct",
|
||||
"object": "model",
|
||||
"name": "Qwen2.5 Coder 32B Instruct Q4",
|
||||
"version": "1.0",
|
||||
"description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf",
|
||||
"ngl": 29
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 32768,
|
||||
"stop": ["<|endoftext|>", "<|im_end|>"],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "QwenLM",
|
||||
"tags": ["32B", "Featured"],
|
||||
"size": 19900000000
|
||||
},
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -49,6 +49,8 @@ const llama321bJson = require('./resources/models/llama3.2-1b-instruct/model.jso
|
||||
const llama323bJson = require('./resources/models/llama3.2-3b-instruct/model.json')
|
||||
const qwen257bJson = require('./resources/models/qwen2.5-7b-instruct/model.json')
|
||||
const qwen25coder7bJson = require('./resources/models/qwen2.5-coder-7b-instruct/model.json')
|
||||
const qwen25coder14bJson = require('./resources/models/qwen2.5-coder-14b-instruct/model.json')
|
||||
const qwen25coder32bJson = require('./resources/models/qwen2.5-coder-32b-instruct/model.json')
|
||||
const qwen2514bJson = require('./resources/models/qwen2.5-14b-instruct/model.json')
|
||||
const qwen2532bJson = require('./resources/models/qwen2.5-32b-instruct/model.json')
|
||||
const qwen2572bJson = require('./resources/models/qwen2.5-72b-instruct/model.json')
|
||||
@ -108,6 +110,8 @@ export default [
|
||||
llama323bJson,
|
||||
qwen257bJson,
|
||||
qwen25coder7bJson,
|
||||
qwen25coder14bJson,
|
||||
qwen25coder32bJson,
|
||||
qwen2514bJson,
|
||||
qwen2532bJson,
|
||||
qwen2572bJson,
|
||||
@ -115,6 +119,7 @@ export default [
|
||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
||||
CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
|
||||
CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
|
||||
}),
|
||||
// Allow json resolution
|
||||
json(),
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
declare const NODE: string
|
||||
declare const CORTEX_API_URL: string
|
||||
declare const CORTEX_SOCKET_URL: string
|
||||
declare const DEFAULT_SETTINGS: Array<any>
|
||||
declare const MODELS: Array<any>
|
||||
|
||||
|
||||
@ -16,17 +16,29 @@ import {
|
||||
getJanDataFolderPath,
|
||||
extractModelLoadParams,
|
||||
fs,
|
||||
events,
|
||||
ModelEvent
|
||||
} from '@janhq/core'
|
||||
import PQueue from 'p-queue'
|
||||
import ky from 'ky'
|
||||
|
||||
/**
|
||||
* Event subscription types of Downloader
|
||||
*/
|
||||
enum DownloadTypes {
|
||||
DownloadUpdated = 'onFileDownloadUpdate',
|
||||
DownloadError = 'onFileDownloadError',
|
||||
DownloadSuccess = 'onFileDownloadSuccess',
|
||||
DownloadStopped = 'onFileDownloadStopped',
|
||||
DownloadStarted = 'onFileDownloadStarted',
|
||||
}
|
||||
|
||||
/**
|
||||
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
*/
|
||||
export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
// DEPRECATED
|
||||
nodeModule: string = 'node'
|
||||
|
||||
queue = new PQueue({ concurrency: 1 })
|
||||
@ -38,6 +50,11 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
*/
|
||||
inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions`
|
||||
|
||||
/**
|
||||
* Socket instance of events subscription
|
||||
*/
|
||||
socket?: WebSocket = undefined
|
||||
|
||||
/**
|
||||
* Subscribes to events emitted by the @janhq/core package.
|
||||
*/
|
||||
@ -55,6 +72,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
|
||||
this.queue.add(() => this.healthz())
|
||||
|
||||
this.subscribeToEvents()
|
||||
|
||||
window.addEventListener('beforeunload', () => {
|
||||
this.clean()
|
||||
})
|
||||
@ -138,7 +157,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
methods: ['get'],
|
||||
},
|
||||
})
|
||||
.then(() => {})
|
||||
.then(() => { })
|
||||
}
|
||||
|
||||
/**
|
||||
@ -154,6 +173,50 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
// Do nothing
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Subscribe to cortex.cpp websocket events
|
||||
*/
|
||||
subscribeToEvents() {
|
||||
this.queue.add(
|
||||
() =>
|
||||
new Promise<void>((resolve) => {
|
||||
this.socket = new WebSocket(`${CORTEX_SOCKET_URL}/events`)
|
||||
|
||||
this.socket.addEventListener('message', (event) => {
|
||||
const data = JSON.parse(event.data)
|
||||
const transferred = data.task.items.reduce(
|
||||
(acc: number, cur: any) => acc + cur.downloadedBytes,
|
||||
0
|
||||
)
|
||||
const total = data.task.items.reduce(
|
||||
(acc: number, cur: any) => acc + cur.bytes,
|
||||
0
|
||||
)
|
||||
const percent = total > 0 ? transferred / total : 0
|
||||
|
||||
events.emit(DownloadTypes[data.type as keyof typeof DownloadTypes], {
|
||||
modelId: data.task.id,
|
||||
percent: percent,
|
||||
size: {
|
||||
transferred: transferred,
|
||||
total: total,
|
||||
},
|
||||
})
|
||||
// Update models list from Hub
|
||||
if (data.type === DownloadTypes.DownloadSuccess) {
|
||||
// Delay for the state update from cortex.cpp
|
||||
// Just to be sure
|
||||
setTimeout(() => {
|
||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||
}, 500)
|
||||
}
|
||||
})
|
||||
resolve()
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// Legacy
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import PQueue from 'p-queue'
|
||||
import ky from 'ky'
|
||||
import { events, extractModelLoadParams, Model, ModelEvent } from '@janhq/core'
|
||||
import { extractModelLoadParams, Model } from '@janhq/core'
|
||||
import { extractInferenceParams } from '@janhq/core'
|
||||
/**
|
||||
* cortex.cpp Model APIs interface
|
||||
@ -24,21 +24,11 @@ type ModelList = {
|
||||
data: any[]
|
||||
}
|
||||
|
||||
enum DownloadTypes {
|
||||
DownloadUpdated = 'onFileDownloadUpdate',
|
||||
DownloadError = 'onFileDownloadError',
|
||||
DownloadSuccess = 'onFileDownloadSuccess',
|
||||
DownloadStopped = 'onFileDownloadStopped',
|
||||
DownloadStarted = 'onFileDownloadStarted',
|
||||
}
|
||||
|
||||
export class CortexAPI implements ICortexAPI {
|
||||
queue = new PQueue({ concurrency: 1 })
|
||||
socket?: WebSocket = undefined
|
||||
|
||||
constructor() {
|
||||
this.queue.add(() => this.healthz())
|
||||
this.subscribeToEvents()
|
||||
}
|
||||
|
||||
/**
|
||||
@ -172,49 +162,6 @@ export class CortexAPI implements ICortexAPI {
|
||||
.then(() => {})
|
||||
}
|
||||
|
||||
/**
|
||||
* Subscribe to cortex.cpp websocket events
|
||||
*/
|
||||
subscribeToEvents() {
|
||||
this.queue.add(
|
||||
() =>
|
||||
new Promise<void>((resolve) => {
|
||||
this.socket = new WebSocket(`${SOCKET_URL}/events`)
|
||||
|
||||
this.socket.addEventListener('message', (event) => {
|
||||
const data = JSON.parse(event.data)
|
||||
const transferred = data.task.items.reduce(
|
||||
(acc, cur) => acc + cur.downloadedBytes,
|
||||
0
|
||||
)
|
||||
const total = data.task.items.reduce(
|
||||
(acc, cur) => acc + cur.bytes,
|
||||
0
|
||||
)
|
||||
const percent = total > 0 ? transferred / total : 0
|
||||
|
||||
events.emit(DownloadTypes[data.type], {
|
||||
modelId: data.task.id,
|
||||
percent: percent,
|
||||
size: {
|
||||
transferred: transferred,
|
||||
total: total,
|
||||
},
|
||||
})
|
||||
// Update models list from Hub
|
||||
if (data.type === DownloadTypes.DownloadSuccess) {
|
||||
// Delay for the state update from cortex.cpp
|
||||
// Just to be sure
|
||||
setTimeout(() => {
|
||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||
}, 500)
|
||||
}
|
||||
})
|
||||
resolve()
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* TRansform model to the expected format (e.g. parameters, settings, metadata)
|
||||
* @param model
|
||||
|
||||
@ -79,7 +79,7 @@ const SystemMonitor = () => {
|
||||
{showSystemMonitorPanel && (
|
||||
<div
|
||||
className={twMerge(
|
||||
'fixed bottom-9 left-[49px] z-50 flex h-[200px] w-[calc(100%-48px)] flex-shrink-0 flex-col border-t border-[hsla(var(--app-border))] bg-[hsla(var(--app-bg))]',
|
||||
'system-monitor-panel fixed bottom-9 left-[49px] z-50 flex w-[calc(100%-48px)] flex-shrink-0 flex-col border-t border-[hsla(var(--app-border))] bg-[hsla(var(--app-bg))]',
|
||||
showFullScreen && 'h-[calc(100%-63px)]',
|
||||
reduceTransparent && 'w-[calc(100%-48px)] rounded-none'
|
||||
)}
|
||||
@ -147,7 +147,6 @@ const SystemMonitor = () => {
|
||||
<span className="flex-shrink-0 ">{ramUtilitized}%</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{gpus.length > 0 && (
|
||||
<div className="mb-4 border-b border-[hsla(var(--app-border))] pb-4 last:border-none">
|
||||
{gpus.map((gpu, index) => {
|
||||
|
||||
@ -189,7 +189,7 @@ const Advanced = () => {
|
||||
* @param gpuId
|
||||
* @returns
|
||||
*/
|
||||
const handleGPUChange = (gpuId: string) => {
|
||||
const handleGPUChange = async (gpuId: string) => {
|
||||
let updatedGpusInUse = [...gpusInUse]
|
||||
if (updatedGpusInUse.includes(gpuId)) {
|
||||
updatedGpusInUse = updatedGpusInUse.filter((id) => id !== gpuId)
|
||||
@ -208,7 +208,7 @@ const Advanced = () => {
|
||||
updatedGpusInUse.push(gpuId)
|
||||
}
|
||||
setGpusInUse(updatedGpusInUse)
|
||||
saveSettings({ gpusInUse: updatedGpusInUse })
|
||||
await saveSettings({ gpusInUse: updatedGpusInUse })
|
||||
window.core?.api?.relaunch()
|
||||
}
|
||||
|
||||
@ -306,8 +306,13 @@ const Advanced = () => {
|
||||
})
|
||||
}
|
||||
// Stop any running model to apply the changes
|
||||
if (e.target.checked !== gpuEnabled)
|
||||
stopModel().then(() => window.core?.api?.relaunch())
|
||||
if (e.target.checked !== gpuEnabled) {
|
||||
stopModel().finally(() => {
|
||||
setTimeout(() => {
|
||||
window.location.reload()
|
||||
}, 300)
|
||||
})
|
||||
}
|
||||
}}
|
||||
/>
|
||||
}
|
||||
|
||||
@ -147,6 +147,20 @@ const ThreadCenterPanel = () => {
|
||||
|
||||
const showSystemMonitorPanel = useAtomValue(showSystemMonitorPanelAtom)
|
||||
|
||||
const [height, setHeight] = useState<number>(0)
|
||||
|
||||
useEffect(() => {
|
||||
if (showSystemMonitorPanel) {
|
||||
const element = document.querySelector('.system-monitor-panel')
|
||||
|
||||
if (element) {
|
||||
setHeight(element.clientHeight) // You can also use offsetHeight if needed
|
||||
}
|
||||
} else {
|
||||
setHeight(0)
|
||||
}
|
||||
}, [showSystemMonitorPanel])
|
||||
|
||||
return (
|
||||
<CenterPanelContainer>
|
||||
<div
|
||||
@ -193,9 +207,10 @@ const ThreadCenterPanel = () => {
|
||||
)}
|
||||
<div
|
||||
className={twMerge(
|
||||
'flex h-full w-full flex-col justify-between',
|
||||
showSystemMonitorPanel && 'h-[calc(100%-200px)]'
|
||||
'flex h-full w-full flex-col justify-between'
|
||||
// showSystemMonitorPanel && `h-[calc(100%-${height}px)]`
|
||||
)}
|
||||
style={{ height: `calc(100% - ${height}px)` }}
|
||||
>
|
||||
{activeThread ? (
|
||||
<div className="flex h-full w-full overflow-x-hidden">
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user