Chore: aya update (#2941)

* init * init * fix: correct format * version bump * add: aya 8b, aya 35b, phi3 * fix: stop token * fix: stop token
2024-05-24 18:10:23 +07:00 · 2024-05-24 18:10:23 +07:00 · 25daba9696
commit 25daba9696
parent 20c9c3ff2b
4 changed files with 78 additions and 2 deletions
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@ -1,7 +1,7 @@
 {
  "name": "@janhq/inference-cortex-extension",
  "productName": "Cortex Inference Engine",
-  "version": "1.0.9",
+  "version": "1.0.10",
  "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
--- a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json
@ -0,0 +1,35 @@
 {
    "sources": [
      {
        "filename": "aya-23-35B-Q4_K_M.gguf",
        "url": "https://huggingface.co/bartowski/aya-23-35B-GGUF/resolve/main/aya-23-35B-Q4_K_M.gguf"
      }
    ],
    "id": "aya-23-35b",
    "object": "model",
    "name": "Aya 23 35B Q4",
    "version": "1.0",
    "description": "Aya 23 can talk upto 23 languages fluently.",
    "format": "gguf",
    "settings": {
      "ctx_len": 8192,
      "prompt_template": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{system_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
      "llama_model_path": "aya-23-35B-Q4_K_M.gguf",
      "ngl": 40
    },
    "parameters": {
      "temperature": 0.7,
      "top_p": 0.95,
      "stream": true,
      "max_tokens": 8192,
      "frequency_penalty": 0,
      "presence_penalty": 0,
      "stop": ["<|END_OF_TURN_TOKEN|>"]
    },
    "metadata": {
      "author": "CohereForAI",
      "tags": ["34B", "Finetuned"],
      "size": 21556982144
    },
    "engine": "nitro"
 }
--- a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json
@ -0,0 +1,35 @@
 {
    "sources": [
      {
        "filename": "aya-23-8B-Q4_K_M.gguf",
        "url": "https://huggingface.co/bartowski/aya-23-8B-GGUF/resolve/main/aya-23-8B-Q4_K_M.gguf"
      }
    ],
    "id": "aya-23-8b",
    "object": "model",
    "name": "Aya 23 8B Q4",
    "version": "1.0",
    "description": "Aya 23 can talk upto 23 languages fluently.",
    "format": "gguf",
    "settings": {
      "ctx_len": 8192,
      "prompt_template": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{system_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
      "llama_model_path": "aya-23-8B-Q4_K_M.gguf",
      "ngl": 32
    },
    "parameters": {
      "temperature": 0.7,
      "top_p": 0.95,
      "stream": true,
      "max_tokens": 8192,
      "frequency_penalty": 0,
      "presence_penalty": 0,
      "stop": ["<|END_OF_TURN_TOKEN|>"]
    },
    "metadata": {
      "author": "CohereForAI",
      "tags": ["7B", "Finetuned","Featured"],
      "size": 5056982144
    },
    "engine": "nitro"
 }
--- a/extensions/inference-nitro-extension/rollup.config.ts
+++ b/extensions/inference-nitro-extension/rollup.config.ts
@ -35,6 +35,9 @@ const wizardcoder13bJson = require('./resources/models/wizardcoder-13b/model.jso
 const yi34bJson = require('./resources/models/yi-34b/model.json')
 const llama3Json = require('./resources/models/llama3-8b-instruct/model.json')
 const llama3Hermes8bJson = require('./resources/models/llama3-hermes-8b/model.json')
 const aya8bJson = require('./resources/models/aya-23-8b/model.json')
 const aya35bJson = require('./resources/models/aya-23-35b/model.json')
 const phimediumJson = require('./resources/models/phi3-medium/model.json')
 export default [
  {
@ -76,7 +79,10 @@ export default [
          wizardcoder13bJson,
          yi34bJson,
          llama3Json,
-          llama3Hermes8bJson
+          llama3Hermes8bJson,
          phimediumJson,
          aya8bJson,
          aya35bJson
        ]),
        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
        DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),