Merge pull request #3998 from janhq/chore/add-qwen-coder-models

chore: add qwen2.5-coder 14B and 32B models
This commit is contained in:
Louis 2024-11-12 16:39:43 +07:00 committed by GitHub
commit ff2a81e41f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 154 additions and 61 deletions

View File

@ -1,7 +1,7 @@
{ {
"name": "@janhq/inference-cortex-extension", "name": "@janhq/inference-cortex-extension",
"productName": "Cortex Inference Engine", "productName": "Cortex Inference Engine",
"version": "1.0.20", "version": "1.0.21",
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"main": "dist/index.js", "main": "dist/index.js",
"node": "dist/node/index.cjs.js", "node": "dist/node/index.cjs.js",

View File

@ -0,0 +1,36 @@
{
"sources": [
{
"filename": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf",
"url": "https://huggingface.co/bartowski/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf"
}
],
"id": "qwen2.5-coder-14b-instruct",
"object": "model",
"name": "Qwen2.5 Coder 14B Instruct Q4",
"version": "1.0",
"description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.",
"format": "gguf",
"settings": {
"ctx_len": 32768,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf",
"ngl": 29
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 32768,
"stop": ["<|endoftext|>", "<|im_end|>"],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "QwenLM",
"tags": ["14B", "Featured"],
"size": 8990000000
},
"engine": "llama-cpp"
}

View File

@ -0,0 +1,36 @@
{
"sources": [
{
"filename": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf",
"url": "https://huggingface.co/bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf"
}
],
"id": "qwen2.5-coder-32b-instruct",
"object": "model",
"name": "Qwen2.5 Coder 32B Instruct Q4",
"version": "1.0",
"description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.",
"format": "gguf",
"settings": {
"ctx_len": 32768,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf",
"ngl": 29
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 32768,
"stop": ["<|endoftext|>", "<|im_end|>"],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "QwenLM",
"tags": ["32B", "Featured"],
"size": 19900000000
},
"engine": "llama-cpp"
}

View File

@ -49,6 +49,8 @@ const llama321bJson = require('./resources/models/llama3.2-1b-instruct/model.jso
const llama323bJson = require('./resources/models/llama3.2-3b-instruct/model.json') const llama323bJson = require('./resources/models/llama3.2-3b-instruct/model.json')
const qwen257bJson = require('./resources/models/qwen2.5-7b-instruct/model.json') const qwen257bJson = require('./resources/models/qwen2.5-7b-instruct/model.json')
const qwen25coder7bJson = require('./resources/models/qwen2.5-coder-7b-instruct/model.json') const qwen25coder7bJson = require('./resources/models/qwen2.5-coder-7b-instruct/model.json')
const qwen25coder14bJson = require('./resources/models/qwen2.5-coder-14b-instruct/model.json')
const qwen25coder32bJson = require('./resources/models/qwen2.5-coder-32b-instruct/model.json')
const qwen2514bJson = require('./resources/models/qwen2.5-14b-instruct/model.json') const qwen2514bJson = require('./resources/models/qwen2.5-14b-instruct/model.json')
const qwen2532bJson = require('./resources/models/qwen2.5-32b-instruct/model.json') const qwen2532bJson = require('./resources/models/qwen2.5-32b-instruct/model.json')
const qwen2572bJson = require('./resources/models/qwen2.5-72b-instruct/model.json') const qwen2572bJson = require('./resources/models/qwen2.5-72b-instruct/model.json')
@ -108,6 +110,8 @@ export default [
llama323bJson, llama323bJson,
qwen257bJson, qwen257bJson,
qwen25coder7bJson, qwen25coder7bJson,
qwen25coder14bJson,
qwen25coder32bJson,
qwen2514bJson, qwen2514bJson,
qwen2532bJson, qwen2532bJson,
qwen2572bJson, qwen2572bJson,
@ -115,6 +119,7 @@ export default [
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'), CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
}), }),
// Allow json resolution // Allow json resolution
json(), json(),

View File

@ -1,5 +1,6 @@
declare const NODE: string declare const NODE: string
declare const CORTEX_API_URL: string declare const CORTEX_API_URL: string
declare const CORTEX_SOCKET_URL: string
declare const DEFAULT_SETTINGS: Array<any> declare const DEFAULT_SETTINGS: Array<any>
declare const MODELS: Array<any> declare const MODELS: Array<any>

View File

@ -16,17 +16,29 @@ import {
getJanDataFolderPath, getJanDataFolderPath,
extractModelLoadParams, extractModelLoadParams,
fs, fs,
events,
ModelEvent
} from '@janhq/core' } from '@janhq/core'
import PQueue from 'p-queue' import PQueue from 'p-queue'
import ky from 'ky' import ky from 'ky'
/**
* Event subscription types of Downloader
*/
enum DownloadTypes {
DownloadUpdated = 'onFileDownloadUpdate',
DownloadError = 'onFileDownloadError',
DownloadSuccess = 'onFileDownloadSuccess',
DownloadStopped = 'onFileDownloadStopped',
DownloadStarted = 'onFileDownloadStarted',
}
/** /**
* A class that implements the InferenceExtension interface from the @janhq/core package. * A class that implements the InferenceExtension interface from the @janhq/core package.
* The class provides methods for initializing and stopping a model, and for making inference requests. * The class provides methods for initializing and stopping a model, and for making inference requests.
* It also subscribes to events emitted by the @janhq/core package and handles new message requests. * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
*/ */
export default class JanInferenceCortexExtension extends LocalOAIEngine { export default class JanInferenceCortexExtension extends LocalOAIEngine {
// DEPRECATED
nodeModule: string = 'node' nodeModule: string = 'node'
queue = new PQueue({ concurrency: 1 }) queue = new PQueue({ concurrency: 1 })
@ -38,6 +50,11 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
*/ */
inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions` inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions`
/**
* Socket instance of events subscription
*/
socket?: WebSocket = undefined
/** /**
* Subscribes to events emitted by the @janhq/core package. * Subscribes to events emitted by the @janhq/core package.
*/ */
@ -55,6 +72,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
this.queue.add(() => this.healthz()) this.queue.add(() => this.healthz())
this.subscribeToEvents()
window.addEventListener('beforeunload', () => { window.addEventListener('beforeunload', () => {
this.clean() this.clean()
}) })
@ -138,7 +157,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
methods: ['get'], methods: ['get'],
}, },
}) })
.then(() => {}) .then(() => { })
} }
/** /**
@ -154,6 +173,50 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
// Do nothing // Do nothing
}) })
} }
/**
* Subscribe to cortex.cpp websocket events
*/
subscribeToEvents() {
this.queue.add(
() =>
new Promise<void>((resolve) => {
this.socket = new WebSocket(`${CORTEX_SOCKET_URL}/events`)
this.socket.addEventListener('message', (event) => {
const data = JSON.parse(event.data)
const transferred = data.task.items.reduce(
(acc: number, cur: any) => acc + cur.downloadedBytes,
0
)
const total = data.task.items.reduce(
(acc: number, cur: any) => acc + cur.bytes,
0
)
const percent = total > 0 ? transferred / total : 0
events.emit(DownloadTypes[data.type as keyof typeof DownloadTypes], {
modelId: data.task.id,
percent: percent,
size: {
transferred: transferred,
total: total,
},
})
// Update models list from Hub
if (data.type === DownloadTypes.DownloadSuccess) {
// Delay for the state update from cortex.cpp
// Just to be sure
setTimeout(() => {
events.emit(ModelEvent.OnModelsUpdate, {})
}, 500)
}
})
resolve()
})
)
}
} }
/// Legacy /// Legacy

View File

@ -1,6 +1,6 @@
import PQueue from 'p-queue' import PQueue from 'p-queue'
import ky from 'ky' import ky from 'ky'
import { events, extractModelLoadParams, Model, ModelEvent } from '@janhq/core' import { extractModelLoadParams, Model } from '@janhq/core'
import { extractInferenceParams } from '@janhq/core' import { extractInferenceParams } from '@janhq/core'
/** /**
* cortex.cpp Model APIs interface * cortex.cpp Model APIs interface
@ -24,21 +24,11 @@ type ModelList = {
data: any[] data: any[]
} }
enum DownloadTypes {
DownloadUpdated = 'onFileDownloadUpdate',
DownloadError = 'onFileDownloadError',
DownloadSuccess = 'onFileDownloadSuccess',
DownloadStopped = 'onFileDownloadStopped',
DownloadStarted = 'onFileDownloadStarted',
}
export class CortexAPI implements ICortexAPI { export class CortexAPI implements ICortexAPI {
queue = new PQueue({ concurrency: 1 }) queue = new PQueue({ concurrency: 1 })
socket?: WebSocket = undefined
constructor() { constructor() {
this.queue.add(() => this.healthz()) this.queue.add(() => this.healthz())
this.subscribeToEvents()
} }
/** /**
@ -172,49 +162,6 @@ export class CortexAPI implements ICortexAPI {
.then(() => {}) .then(() => {})
} }
/**
* Subscribe to cortex.cpp websocket events
*/
subscribeToEvents() {
this.queue.add(
() =>
new Promise<void>((resolve) => {
this.socket = new WebSocket(`${SOCKET_URL}/events`)
this.socket.addEventListener('message', (event) => {
const data = JSON.parse(event.data)
const transferred = data.task.items.reduce(
(acc, cur) => acc + cur.downloadedBytes,
0
)
const total = data.task.items.reduce(
(acc, cur) => acc + cur.bytes,
0
)
const percent = total > 0 ? transferred / total : 0
events.emit(DownloadTypes[data.type], {
modelId: data.task.id,
percent: percent,
size: {
transferred: transferred,
total: total,
},
})
// Update models list from Hub
if (data.type === DownloadTypes.DownloadSuccess) {
// Delay for the state update from cortex.cpp
// Just to be sure
setTimeout(() => {
events.emit(ModelEvent.OnModelsUpdate, {})
}, 500)
}
})
resolve()
})
)
}
/** /**
* TRansform model to the expected format (e.g. parameters, settings, metadata) * TRansform model to the expected format (e.g. parameters, settings, metadata)
* @param model * @param model

View File

@ -189,7 +189,7 @@ const Advanced = () => {
* @param gpuId * @param gpuId
* @returns * @returns
*/ */
const handleGPUChange = (gpuId: string) => { const handleGPUChange = async (gpuId: string) => {
let updatedGpusInUse = [...gpusInUse] let updatedGpusInUse = [...gpusInUse]
if (updatedGpusInUse.includes(gpuId)) { if (updatedGpusInUse.includes(gpuId)) {
updatedGpusInUse = updatedGpusInUse.filter((id) => id !== gpuId) updatedGpusInUse = updatedGpusInUse.filter((id) => id !== gpuId)
@ -208,7 +208,7 @@ const Advanced = () => {
updatedGpusInUse.push(gpuId) updatedGpusInUse.push(gpuId)
} }
setGpusInUse(updatedGpusInUse) setGpusInUse(updatedGpusInUse)
saveSettings({ gpusInUse: updatedGpusInUse }) await saveSettings({ gpusInUse: updatedGpusInUse })
window.core?.api?.relaunch() window.core?.api?.relaunch()
} }
@ -306,8 +306,13 @@ const Advanced = () => {
}) })
} }
// Stop any running model to apply the changes // Stop any running model to apply the changes
if (e.target.checked !== gpuEnabled) if (e.target.checked !== gpuEnabled) {
stopModel().then(() => window.core?.api?.relaunch()) stopModel().finally(() => {
setTimeout(() => {
window.location.reload()
}, 300)
})
}
}} }}
/> />
} }