chore: remote engine error handling (#4646)

* chore: Gemini error handling

* chore: remote provider error handling

* chore: remote provider error handling

* chore: fix anthropic unsupported parameters

* chore: fix tests
This commit is contained in:
Louis 2025-02-13 18:32:33 +07:00 committed by GitHub
parent 0c0b7e5fcc
commit 7a6890bd7f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 185 additions and 130 deletions

View File

@ -38,8 +38,14 @@ describe('OAIEngine', () => {
it('should subscribe to events on load', () => { it('should subscribe to events on load', () => {
engine.onLoad() engine.onLoad()
expect(events.on).toHaveBeenCalledWith(MessageEvent.OnMessageSent, expect.any(Function)) expect(events.on).toHaveBeenCalledWith(
expect(events.on).toHaveBeenCalledWith(InferenceEvent.OnInferenceStopped, expect.any(Function)) MessageEvent.OnMessageSent,
expect.any(Function)
)
expect(events.on).toHaveBeenCalledWith(
InferenceEvent.OnInferenceStopped,
expect.any(Function)
)
}) })
it('should handle inference request', async () => { it('should handle inference request', async () => {
@ -77,7 +83,12 @@ describe('OAIEngine', () => {
expect(events.emit).toHaveBeenCalledWith( expect(events.emit).toHaveBeenCalledWith(
MessageEvent.OnMessageUpdate, MessageEvent.OnMessageUpdate,
expect.objectContaining({ expect.objectContaining({
content: [{ type: ContentType.Text, text: { value: 'test response', annotations: [] } }], content: [
{
type: ContentType.Text,
text: { value: 'test response', annotations: [] },
},
],
status: MessageStatus.Ready, status: MessageStatus.Ready,
}) })
) )
@ -101,11 +112,10 @@ describe('OAIEngine', () => {
await engine.inference(data) await engine.inference(data)
expect(events.emit).toHaveBeenCalledWith( expect(events.emit).toHaveBeenLastCalledWith(
MessageEvent.OnMessageUpdate, MessageEvent.OnMessageUpdate,
expect.objectContaining({ expect.objectContaining({
content: [{ type: ContentType.Text, text: { value: 'test error', annotations: [] } }], status: 'error',
status: MessageStatus.Error,
error_code: 500, error_code: 500,
}) })
) )

View File

@ -42,7 +42,9 @@ export abstract class OAIEngine extends AIEngine {
*/ */
override onLoad() { override onLoad() {
super.onLoad() super.onLoad()
events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data)) events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
this.inference(data)
)
events.on(InferenceEvent.OnInferenceStopped, () => this.stopInference()) events.on(InferenceEvent.OnInferenceStopped, () => this.stopInference())
} }
@ -128,7 +130,9 @@ export abstract class OAIEngine extends AIEngine {
events.emit(MessageEvent.OnMessageUpdate, message) events.emit(MessageEvent.OnMessageUpdate, message)
}, },
complete: async () => { complete: async () => {
message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error message.status = message.content.length
? MessageStatus.Ready
: MessageStatus.Error
events.emit(MessageEvent.OnMessageUpdate, message) events.emit(MessageEvent.OnMessageUpdate, message)
}, },
error: async (err: any) => { error: async (err: any) => {
@ -141,7 +145,10 @@ export abstract class OAIEngine extends AIEngine {
message.content[0] = { message.content[0] = {
type: ContentType.Text, type: ContentType.Text,
text: { text: {
value: err.message, value:
typeof message === 'string'
? err.message
: (JSON.stringify(err.message) ?? err.detail),
annotations: [], annotations: [],
}, },
} }

View File

@ -1,14 +1,17 @@
import { lastValueFrom, Observable } from 'rxjs' import { lastValueFrom, Observable } from 'rxjs'
import { requestInference } from './sse' import { requestInference } from './sse'
import { ReadableStream } from 'stream/web'; import { ReadableStream } from 'stream/web'
describe('requestInference', () => { describe('requestInference', () => {
it('should send a request to the inference server and return an Observable', () => { it('should send a request to the inference server and return an Observable', () => {
// Mock the fetch function // Mock the fetch function
const mockFetch: any = jest.fn(() => const mockFetch: any = jest.fn(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }), json: () =>
Promise.resolve({
choices: [{ message: { content: 'Generated response' } }],
}),
headers: new Headers(), headers: new Headers(),
redirected: false, redirected: false,
status: 200, status: 200,
@ -36,7 +39,10 @@ describe('requestInference', () => {
const mockFetch: any = jest.fn(() => const mockFetch: any = jest.fn(() =>
Promise.resolve({ Promise.resolve({
ok: false, ok: false,
json: () => Promise.resolve({ error: { message: 'Wrong API Key', code: 'invalid_api_key' } }), json: () =>
Promise.resolve({
error: { message: 'Invalid API Key.', code: 'invalid_api_key' },
}),
headers: new Headers(), headers: new Headers(),
redirected: false, redirected: false,
status: 401, status: 401,
@ -56,7 +62,10 @@ describe('requestInference', () => {
// Assert the expected behavior // Assert the expected behavior
expect(result).toBeInstanceOf(Observable) expect(result).toBeInstanceOf(Observable)
expect(lastValueFrom(result)).rejects.toEqual({ message: 'Wrong API Key', code: 'invalid_api_key' }) expect(lastValueFrom(result)).rejects.toEqual({
message: 'Invalid API Key.',
code: 'invalid_api_key',
})
}) })
}) })
@ -65,7 +74,10 @@ describe('requestInference', () => {
const mockFetch: any = jest.fn(() => const mockFetch: any = jest.fn(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }), json: () =>
Promise.resolve({
choices: [{ message: { content: 'Generated response' } }],
}),
headers: new Headers(), headers: new Headers(),
redirected: false, redirected: false,
status: 200, status: 200,
@ -78,17 +90,24 @@ describe('requestInference', () => {
const inferenceUrl = 'https://inference-server.com' const inferenceUrl = 'https://inference-server.com'
const requestBody = { message: 'Hello' } const requestBody = { message: 'Hello' }
const model = { id: 'model-id', parameters: { stream: false } } const model = { id: 'model-id', parameters: { stream: false } }
const transformResponse = (data: any) => data.choices[0].message.content.toUpperCase() const transformResponse = (data: any) =>
data.choices[0].message.content.toUpperCase()
// Call the function // Call the function
const result = requestInference(inferenceUrl, requestBody, model, undefined, undefined, transformResponse) const result = requestInference(
inferenceUrl,
requestBody,
model,
undefined,
undefined,
transformResponse
)
// Assert the expected behavior // Assert the expected behavior
expect(result).toBeInstanceOf(Observable) expect(result).toBeInstanceOf(Observable)
expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE') expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE')
}) })
it('should handle a successful response with streaming enabled', () => { it('should handle a successful response with streaming enabled', () => {
// Mock the fetch function // Mock the fetch function
const mockFetch: any = jest.fn(() => const mockFetch: any = jest.fn(() =>
@ -96,29 +115,32 @@ describe('requestInference', () => {
ok: true, ok: true,
body: new ReadableStream({ body: new ReadableStream({
start(controller) { start(controller) {
controller.enqueue(new TextEncoder().encode('data: {"choices": [{"delta": {"content": "Streamed"}}]}')); controller.enqueue(
controller.enqueue(new TextEncoder().encode('data: [DONE]')); new TextEncoder().encode(
controller.close(); 'data: {"choices": [{"delta": {"content": "Streamed"}}]}'
} )
)
controller.enqueue(new TextEncoder().encode('data: [DONE]'))
controller.close()
},
}), }),
headers: new Headers(), headers: new Headers(),
redirected: false, redirected: false,
status: 200, status: 200,
statusText: 'OK', statusText: 'OK',
}) })
); )
jest.spyOn(global, 'fetch').mockImplementation(mockFetch); jest.spyOn(global, 'fetch').mockImplementation(mockFetch)
// Define the test inputs // Define the test inputs
const inferenceUrl = 'https://inference-server.com'; const inferenceUrl = 'https://inference-server.com'
const requestBody = { message: 'Hello' }; const requestBody = { message: 'Hello' }
const model = { id: 'model-id', parameters: { stream: true } }; const model = { id: 'model-id', parameters: { stream: true } }
// Call the function // Call the function
const result = requestInference(inferenceUrl, requestBody, model); const result = requestInference(inferenceUrl, requestBody, model)
// Assert the expected behavior // Assert the expected behavior
expect(result).toBeInstanceOf(Observable); expect(result).toBeInstanceOf(Observable)
expect(lastValueFrom(result)).resolves.toEqual('Streamed'); expect(lastValueFrom(result)).resolves.toEqual('Streamed')
}); })

View File

@ -32,21 +32,20 @@ export function requestInference(
}) })
.then(async (response) => { .then(async (response) => {
if (!response.ok) { if (!response.ok) {
const data = await response.json() if (response.status === 401) {
let errorCode = ErrorCode.Unknown throw {
if (data.error) { code: ErrorCode.InvalidApiKey,
errorCode = data.error.code ?? data.error.type ?? ErrorCode.Unknown message: 'Invalid API Key.',
} else if (response.status === 401) {
errorCode = ErrorCode.InvalidApiKey
} }
const error = {
message: data.error?.message ?? data.message ?? 'Error occurred.',
code: errorCode,
} }
subscriber.error(error) let data = await response.json()
subscriber.complete() try {
handleError(data)
} catch (err) {
subscriber.error(err)
return return
} }
}
// There could be overriden stream parameter in the model // There could be overriden stream parameter in the model
// that is set in request body (transformed payload) // that is set in request body (transformed payload)
if ( if (
@ -54,9 +53,10 @@ export function requestInference(
model.parameters?.stream === false model.parameters?.stream === false
) { ) {
const data = await response.json() const data = await response.json()
if (data.error || data.message) { try {
subscriber.error(data.error ?? data) handleError(data)
subscriber.complete() } catch (err) {
subscriber.error(err)
return return
} }
if (transformResponse) { if (transformResponse) {
@ -91,13 +91,10 @@ export function requestInference(
const toParse = cachedLines + line const toParse = cachedLines + line
if (!line.includes('data: [DONE]')) { if (!line.includes('data: [DONE]')) {
const data = JSON.parse(toParse.replace('data: ', '')) const data = JSON.parse(toParse.replace('data: ', ''))
if ( try {
'error' in data || handleError(data)
'message' in data || } catch (err) {
'detail' in data subscriber.error(err)
) {
subscriber.error(data.error ?? data)
subscriber.complete()
return return
} }
content += data.choices[0]?.delta?.content ?? '' content += data.choices[0]?.delta?.content ?? ''
@ -118,3 +115,18 @@ export function requestInference(
.catch((err) => subscriber.error(err)) .catch((err) => subscriber.error(err))
}) })
} }
/**
* Handle error and normalize it to a common format.
* @param data
*/
const handleError = (data: any) => {
if (
data.error ||
data.message ||
data.detail ||
(Array.isArray(data) && data.length && data[0].error)
) {
throw data.error ?? data[0]?.error ?? data
}
}

View File

@ -150,6 +150,7 @@ export type ModelSettingParams = {
*/ */
export type ModelRuntimeParams = { export type ModelRuntimeParams = {
temperature?: number temperature?: number
max_temperature?: number
token_limit?: number token_limit?: number
top_k?: number top_k?: number
top_p?: number top_p?: number

View File

@ -8,6 +8,7 @@
"inference_params": { "inference_params": {
"max_tokens": 4096, "max_tokens": 4096,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"stream": true "stream": true
}, },
"engine": "anthropic" "engine": "anthropic"
@ -21,6 +22,7 @@
"inference_params": { "inference_params": {
"max_tokens": 8192, "max_tokens": 8192,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"stream": true "stream": true
}, },
"engine": "anthropic" "engine": "anthropic"
@ -34,6 +36,7 @@
"inference_params": { "inference_params": {
"max_tokens": 8192, "max_tokens": 8192,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"stream": true "stream": true
}, },
"engine": "anthropic" "engine": "anthropic"

View File

@ -8,6 +8,7 @@
"inference_params": { "inference_params": {
"max_tokens": 4096, "max_tokens": 4096,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"stream": false "stream": false
}, },
"engine": "cohere" "engine": "cohere"
@ -21,6 +22,7 @@
"inference_params": { "inference_params": {
"max_tokens": 4096, "max_tokens": 4096,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"stream": false "stream": false
}, },
"engine": "cohere" "engine": "cohere"

View File

@ -8,6 +8,7 @@
"inference_params": { "inference_params": {
"max_tokens": 32000, "max_tokens": 32000,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"top_p": 0.95, "top_p": 0.95,
"stream": true "stream": true
}, },
@ -22,6 +23,7 @@
"inference_params": { "inference_params": {
"max_tokens": 32000, "max_tokens": 32000,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"top_p": 0.95, "top_p": 0.95,
"stream": true "stream": true
}, },
@ -36,6 +38,7 @@
"inference_params": { "inference_params": {
"max_tokens": 32000, "max_tokens": 32000,
"temperature": 0.7, "temperature": 0.7,
"max_temperature": 1.0,
"top_p": 0.95, "top_p": 0.95,
"stream": true "stream": true
}, },

View File

@ -8,6 +8,7 @@
"inference_params": { "inference_params": {
"max_tokens": 1024, "max_tokens": 1024,
"temperature": 0.3, "temperature": 0.3,
"max_temperature": 1.0,
"top_p": 1, "top_p": 1,
"stream": false, "stream": false,
"frequency_penalty": 0, "frequency_penalty": 0,

View File

@ -79,11 +79,7 @@
"description": "OpenAI o1 is a new model with complex reasoning", "description": "OpenAI o1 is a new model with complex reasoning",
"format": "api", "format": "api",
"inference_params": { "inference_params": {
"max_tokens": 100000, "max_tokens": 100000
"temperature": 1,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0
}, },
"engine": "openai" "engine": "openai"
}, },
@ -96,11 +92,7 @@
"format": "api", "format": "api",
"inference_params": { "inference_params": {
"max_tokens": 32768, "max_tokens": 32768,
"temperature": 1, "stream": true
"top_p": 1,
"stream": true,
"frequency_penalty": 0,
"presence_penalty": 0
}, },
"engine": "openai" "engine": "openai"
}, },
@ -113,11 +105,7 @@
"format": "api", "format": "api",
"inference_params": { "inference_params": {
"max_tokens": 65536, "max_tokens": 65536,
"temperature": 1, "stream": true
"top_p": 1,
"stream": true,
"frequency_penalty": 0,
"presence_penalty": 0
}, },
"engine": "openai" "engine": "openai"
} }

View File

@ -10,7 +10,7 @@
"transform_req": { "transform_req": {
"chat_completions": { "chat_completions": {
"url": "https://api.anthropic.com/v1/messages", "url": "https://api.anthropic.com/v1/messages",
"template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": \"{{ input_request.messages.0.content }}\", \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": \"{{ message.role}}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }" "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": {{ tojson(input_request.messages.0.content) }}, \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"metadata\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
} }
}, },
"transform_resp": { "transform_resp": {

View File

@ -199,7 +199,7 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
.post(`${API_URL}/v1/models/add`, { .post(`${API_URL}/v1/models/add`, {
json: { json: {
inference_params: { inference_params: {
max_tokens: 8192, max_tokens: 4096,
temperature: 0.7, temperature: 0.7,
top_p: 0.95, top_p: 0.95,
stream: true, stream: true,

View File

@ -1 +1 @@
1.0.10-rc6 1.0.10-rc7

View File

@ -10,7 +10,9 @@ const AutoLink = ({ text }: Props) => {
return ( return (
<> <>
{text.split(delimiter).map((word) => { {text &&
typeof text === 'string' &&
text.split(delimiter).map((word) => {
const match = word.match(delimiter) const match = word.match(delimiter)
if (match) { if (match) {
const url = match[0] const url = match[0]

View File

@ -27,6 +27,10 @@ export const getConfigurationsData = (
componentSetting.controllerProps.max || componentSetting.controllerProps.max ||
4096 4096
break break
case 'temperature':
componentSetting.controllerProps.max =
selectedModel?.parameters?.max_temperature || 2
break
case 'ctx_len': case 'ctx_len':
componentSetting.controllerProps.max = componentSetting.controllerProps.max =
selectedModel?.settings.ctx_len || selectedModel?.settings.ctx_len ||