* fix: correct context shift flag handling in LlamaCPP extension The previous implementation added the `--no-context-shift` flag when `cfg.ctx_shift` was disabled, which conflicted with the llama.cpp CLI where the presence of `--context-shift` enables the feature. The logic is updated to push `--context-shift` only when `cfg.ctx_shift` is true, ensuring the extension passes the correct argument and behaves as expected. * feat: detect model out of context during generation --------- Co-authored-by: Dinh Long Nguyen <dinhlongviolin1@gmail.com>
This commit is contained in:
parent
ad428f587b
commit
654e566dcb
@ -42,6 +42,9 @@ import {
|
|||||||
} from '@janhq/tauri-plugin-llamacpp-api'
|
} from '@janhq/tauri-plugin-llamacpp-api'
|
||||||
import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
|
import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
|
||||||
|
|
||||||
|
// Error message constant - matches web-app/src/utils/error.ts
|
||||||
|
const OUT_OF_CONTEXT_SIZE = 'the request exceeds the available context size.'
|
||||||
|
|
||||||
type LlamacppConfig = {
|
type LlamacppConfig = {
|
||||||
version_backend: string
|
version_backend: string
|
||||||
auto_update_engine: boolean
|
auto_update_engine: boolean
|
||||||
@ -1541,7 +1544,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
args.push('--main-gpu', String(cfg.main_gpu))
|
args.push('--main-gpu', String(cfg.main_gpu))
|
||||||
|
|
||||||
// Boolean flags
|
// Boolean flags
|
||||||
if (!cfg.ctx_shift) args.push('--no-context-shift')
|
if (cfg.ctx_shift) args.push('--context-shift')
|
||||||
if (Number(version.replace(/^b/, '')) >= 6325) {
|
if (Number(version.replace(/^b/, '')) >= 6325) {
|
||||||
if (!cfg.flash_attn) args.push('--flash-attn', 'off') //default: auto = ON when supported
|
if (!cfg.flash_attn) args.push('--flash-attn', 'off') //default: auto = ON when supported
|
||||||
} else {
|
} else {
|
||||||
@ -1739,6 +1742,13 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
try {
|
try {
|
||||||
const data = JSON.parse(jsonStr)
|
const data = JSON.parse(jsonStr)
|
||||||
const chunk = data as chatCompletionChunk
|
const chunk = data as chatCompletionChunk
|
||||||
|
|
||||||
|
// Check for out-of-context error conditions
|
||||||
|
if (chunk.choices?.[0]?.finish_reason === 'length') {
|
||||||
|
// finish_reason 'length' indicates context limit was hit
|
||||||
|
throw new Error(OUT_OF_CONTEXT_SIZE)
|
||||||
|
}
|
||||||
|
|
||||||
yield chunk
|
yield chunk
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
logger.error('Error parsing JSON from stream or server error:', e)
|
logger.error('Error parsing JSON from stream or server error:', e)
|
||||||
@ -1817,7 +1827,15 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
return (await response.json()) as chatCompletion
|
const completionResponse = (await response.json()) as chatCompletion
|
||||||
|
|
||||||
|
// Check for out-of-context error conditions
|
||||||
|
if (completionResponse.choices?.[0]?.finish_reason === 'length') {
|
||||||
|
// finish_reason 'length' indicates context limit was hit
|
||||||
|
throw new Error(OUT_OF_CONTEXT_SIZE)
|
||||||
|
}
|
||||||
|
|
||||||
|
return completionResponse
|
||||||
}
|
}
|
||||||
|
|
||||||
override async delete(modelId: string): Promise<void> {
|
override async delete(modelId: string): Promise<void> {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user