diff --git a/core/src/browser/extensions/engines/helpers/sse.ts b/core/src/browser/extensions/engines/helpers/sse.ts index 9713256b3..bd9945d3c 100644 --- a/core/src/browser/extensions/engines/helpers/sse.ts +++ b/core/src/browser/extensions/engines/helpers/sse.ts @@ -45,7 +45,9 @@ export function requestInference( subscriber.complete() return } - if (model.parameters?.stream === false) { + // There could be overriden stream parameter in the model + // that is set in request body (transformed payload) + if (requestBody?.stream === false || model.parameters?.stream === false) { const data = await response.json() if (transformResponse) { subscriber.next(transformResponse(data)) diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index fc7e250ab..174ce7923 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -69,12 +69,13 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { super.onLoad() - await this.queue.add(() => this.clean()) - this.queue.add(() => this.healthz()) - this.queue.add(() => this.setDefaultEngine(systemInfo)) + this.queue.add(() => this.clean()) + // Run the process watchdog const systemInfo = await systemInformation() - await executeOnMain(NODE, 'run', systemInfo) + this.queue.add(() => executeOnMain(NODE, 'run', systemInfo)) + this.queue.add(() => this.healthz()) + this.queue.add(() => this.setDefaultEngine(systemInfo)) this.subscribeToEvents() window.addEventListener('beforeunload', () => { diff --git a/extensions/inference-openai-extension/resources/models.json b/extensions/inference-openai-extension/resources/models.json index 124e123b9..3f41c0a7d 100644 --- a/extensions/inference-openai-extension/resources/models.json +++ b/extensions/inference-openai-extension/resources/models.json @@ -97,11 +97,9 @@ "format": "api", "settings": {}, "parameters": { - "max_tokens": 4096, - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "stop": [], + "temperature": 1, + "top_p": 1, + "max_tokens": 32768, "frequency_penalty": 0, "presence_penalty": 0 }, @@ -125,11 +123,9 @@ "format": "api", "settings": {}, "parameters": { - "max_tokens": 4096, - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "stop": [], + "temperature": 1, + "top_p": 1, + "max_tokens": 65536, "frequency_penalty": 0, "presence_penalty": 0 }, diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts index 64880b678..d484c8ae5 100644 --- a/extensions/inference-openai-extension/src/index.ts +++ b/extensions/inference-openai-extension/src/index.ts @@ -76,11 +76,11 @@ export default class JanInferenceOpenAIExtension extends RemoteOAIEngine { transformPayload = (payload: OpenAIPayloadType): OpenAIPayloadType => { // Transform the payload for preview models if (this.previewModels.includes(payload.model)) { - const { max_tokens, temperature, top_p, stop, ...params } = payload + const { max_tokens, stop, ...params } = payload return { ...params, max_completion_tokens: max_tokens, - stream: false // o1 only support stream = false + stream: false, // o1 only support stream = false } } // Pass through for non-preview models diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json index bd834454a..ca563ff9f 100644 --- a/extensions/model-extension/package.json +++ b/extensions/model-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/model-extension", "productName": "Model Management", - "version": "1.0.34", + "version": "1.0.35", "description": "Model Management Extension provides model exploration and seamless downloads", "main": "dist/index.js", "author": "Jan ", diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts index f1ce069f6..4ebc56d54 100644 --- a/extensions/model-extension/src/index.ts +++ b/extensions/model-extension/src/index.ts @@ -143,7 +143,10 @@ export default class JanModelExtension extends ModelExtension { * There is no model to import * just return fetched models */ - if (!toImportModels.length) return fetchedModels + if (!toImportModels.length) + return fetchedModels.concat( + legacyModels.filter((e) => e.settings?.vision_model) + ) console.log('To import models:', toImportModels.length) /**