From 9fb95642df882c47902230f7e7b02a9e7cc7d8f4 Mon Sep 17 00:00:00 2001 From: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Date: Mon, 1 Jul 2024 18:36:08 +0700 Subject: [PATCH] Chore: Add stop token for Gemma 2b (#3125) * add stop token * Bump version --- extensions/inference-nitro-extension/package.json | 2 +- .../resources/models/gemma-2b/model.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index 50294b0ab..e317277f9 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.13", + "version": "1.0.14", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json index e5ee3c239..68cff325a 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json +++ b/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json @@ -8,7 +8,7 @@ "id": "gemma-2b", "object": "model", "name": "Gemma 2B Q4", - "version": "1.2", + "version": "1.3", "description": "Gemma is built from the same technology with Google's Gemini.", "format": "gguf", "settings": { @@ -22,7 +22,7 @@ "top_p": 0.95, "stream": true, "max_tokens": 8192, - "stop": [], + "stop": [""], "frequency_penalty": 0, "presence_penalty": 0 },