From 01d3fb2f4952158ad92ed58ce3f979e1839da067 Mon Sep 17 00:00:00 2001 From: tikikun Date: Fri, 1 Dec 2023 14:05:33 +0700 Subject: [PATCH 01/13] docs: add engine --- docs/docs/specs/engineering/engine.md | 6 ++++++ docs/sidebars.js | 1 + 2 files changed, 7 insertions(+) create mode 100644 docs/docs/specs/engineering/engine.md diff --git a/docs/docs/specs/engineering/engine.md b/docs/docs/specs/engineering/engine.md new file mode 100644 index 000000000..13db10bf6 --- /dev/null +++ b/docs/docs/specs/engineering/engine.md @@ -0,0 +1,6 @@ +--- +title: Engine +slug: /specs/engine +--- + +# Hello world diff --git a/docs/sidebars.js b/docs/sidebars.js index edef458cd..384f47e9d 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -81,6 +81,7 @@ const sidebars = { items: [ "specs/engineering/chats", "specs/engineering/models", + "specs/engineering/engine", "specs/engineering/threads", "specs/engineering/messages", "specs/engineering/assistants", From 5403d9a8e5d9f1c4e2217b14f3a4c1af2351d07f Mon Sep 17 00:00:00 2001 From: tikikun Date: Fri, 1 Dec 2023 14:39:00 +0700 Subject: [PATCH 02/13] docs: change to correct naming of engine and model params --- docs/docs/specs/engineering/models.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/specs/engineering/models.md b/docs/docs/specs/engineering/models.md index c47a62bab..283c8e37e 100644 --- a/docs/docs/specs/engineering/models.md +++ b/docs/docs/specs/engineering/models.md @@ -64,13 +64,13 @@ Here's a standard example `model.json` for a GGUF model. "description": null, // Defaults to null "state": enum[null, "downloading", "ready", "starting", "stopping", ...] "format": "ggufv3", // Defaults to "ggufv3" -"settings": { // Models are initialized with settings +"engine_parameters": { // Engine "ctx_len": 2048, "ngl": 100, "embedding": true, "n_parallel": 4, }, -"parameters": { // Models are called parameters +"model_parameters": { // Models are called parameters "stream": true, "max_tokens": 2048, "stop": [""], // This usually can be left blank, only used with specific need from model author From 7a64b453653ba6e335210e09ed843041ce439fd3 Mon Sep 17 00:00:00 2001 From: tikikun Date: Fri, 1 Dec 2023 17:20:07 +0700 Subject: [PATCH 03/13] docs: add docs for engine, makeup for models docs --- docs/docs/specs/engineering/engine.md | 57 ++++++++++++++++++++++++++- docs/docs/specs/engineering/models.md | 2 +- 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/docs/docs/specs/engineering/engine.md b/docs/docs/specs/engineering/engine.md index 13db10bf6..c0db37e4d 100644 --- a/docs/docs/specs/engineering/engine.md +++ b/docs/docs/specs/engineering/engine.md @@ -1,6 +1,59 @@ --- -title: Engine +title: Engine Specifications slug: /specs/engine --- -# Hello world +:::caution + +Currently Under Development + +::: + +## Overview + +In the Jan application, engines serve as primary entities with the following capabilities: + +- Ability to add or remove engines via the installation of extensions. +- Dependence on engines for conducting [inference](https://en.wikipedia.org/wiki/Inference_engine). + +## Folder Structure + +- Default parameters for engines are stored in JSON files located in the `/engines` folder. +- These parameter files are named uniquely with `engine_id`. +- Engines are referenced directly using `engine_id` in the `model.json` file. + +```yaml +jan/ + engines/ + nitro.json + openai.json + ..... +``` + +## Engine Default Parameter Files + +- Each inference engine requires default parameters to function in cases where user-provided parameters are absent. +- These parameters are stored in JSON files, structured as simple key-value pairs. + +### Example + +Here is an example of an engine file for `engine_id` `nitro`: + +```js +{ + "llama_model_path": "/model/llama-2-7b-model.gguf", + "ctx_len": 512, + "ngl": 100, + "embedding": false, + "n_parallel": 1, + "cont_batching": false +} +``` + +For detailed engine parameters, refer to: [Nitro's Model Settings](https://nitro.jan.ai/features/load-unload#table-of-parameters) + +## Adding an Engine + +- Engine parameter files are automatically generated upon installing an `inference-extension` in the Jan application. + +--- diff --git a/docs/docs/specs/engineering/models.md b/docs/docs/specs/engineering/models.md index 283c8e37e..a977ce1d4 100644 --- a/docs/docs/specs/engineering/models.md +++ b/docs/docs/specs/engineering/models.md @@ -64,7 +64,7 @@ Here's a standard example `model.json` for a GGUF model. "description": null, // Defaults to null "state": enum[null, "downloading", "ready", "starting", "stopping", ...] "format": "ggufv3", // Defaults to "ggufv3" -"engine_parameters": { // Engine +"engine_parameters": { // Engine parameters inside model.json can override the value inside the base engine.json "ctx_len": 2048, "ngl": 100, "embedding": true, From bb6cb1396e986acc795be3d03ecccd768b091e50 Mon Sep 17 00:00:00 2001 From: tikikun Date: Fri, 1 Dec 2023 17:20:25 +0700 Subject: [PATCH 04/13] rename title --- docs/docs/specs/engineering/engine.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/specs/engineering/engine.md b/docs/docs/specs/engineering/engine.md index c0db37e4d..17e01ebd0 100644 --- a/docs/docs/specs/engineering/engine.md +++ b/docs/docs/specs/engineering/engine.md @@ -1,5 +1,5 @@ --- -title: Engine Specifications +title: Engine slug: /specs/engine --- From 97fcafe61b4b7a1a0874cb7d033b5b68dfd522b1 Mon Sep 17 00:00:00 2001 From: tikikun Date: Fri, 1 Dec 2023 17:24:54 +0700 Subject: [PATCH 05/13] docs: rewrite overview --- docs/docs/specs/engineering/engine.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/docs/specs/engineering/engine.md b/docs/docs/specs/engineering/engine.md index 17e01ebd0..a416bc820 100644 --- a/docs/docs/specs/engineering/engine.md +++ b/docs/docs/specs/engineering/engine.md @@ -13,8 +13,9 @@ Currently Under Development In the Jan application, engines serve as primary entities with the following capabilities: -- Ability to add or remove engines via the installation of extensions. -- Dependence on engines for conducting [inference](https://en.wikipedia.org/wiki/Inference_engine). +- Engine will be installed through `inference-extensions`. +- Models will depend on engines to do [inference](https://en.wikipedia.org/wiki/Inference_engine). +- Engine configuration and required metadata will be stored in a json file. ## Folder Structure From 242204df244fb171a54472023e49d7580aa882d5 Mon Sep 17 00:00:00 2001 From: tikikun Date: Fri, 1 Dec 2023 17:25:52 +0700 Subject: [PATCH 06/13] update models.md --- docs/docs/specs/engineering/models.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/docs/specs/engineering/models.md b/docs/docs/specs/engineering/models.md index a977ce1d4..517b9fefe 100644 --- a/docs/docs/specs/engineering/models.md +++ b/docs/docs/specs/engineering/models.md @@ -51,9 +51,9 @@ jan/ # Jan root folder Here's a standard example `model.json` for a GGUF model. -- `source_url`: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/. ```js +{ "id": "zephyr-7b", // Defaults to foldername "object": "model", // Defaults to "model" "source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf", @@ -64,8 +64,8 @@ Here's a standard example `model.json` for a GGUF model. "description": null, // Defaults to null "state": enum[null, "downloading", "ready", "starting", "stopping", ...] "format": "ggufv3", // Defaults to "ggufv3" -"engine_parameters": { // Engine parameters inside model.json can override the value inside the base engine.json - "ctx_len": 2048, +"engine_parameters": { // Engine parameters inside model.json can override + "ctx_len": 2048, // the value inside the base engine.json "ngl": 100, "embedding": true, "n_parallel": 4, @@ -83,9 +83,10 @@ Here's a standard example `model.json` for a GGUF model. "assets": [ // Defaults to current dir "file://.../zephyr-7b-q4_k_m.bin", ] +} ``` -The model settings in the example can be found at: [Nitro's model settings](https://nitro.jan.ai/features/load-unload#table-of-parameters) +The engine parameters in the example can be found at: [Nitro's model settings](https://nitro.jan.ai/features/load-unload#table-of-parameters) The model parameters in the example can be found at: [Nitro's model parameters](https://nitro.jan.ai/api-reference#tag/Chat-Completion) From bd185c633af82c57855e6a1ee40714d164ff40d8 Mon Sep 17 00:00:00 2001 From: tikikun Date: Fri, 1 Dec 2023 17:35:26 +0700 Subject: [PATCH 07/13] add prompt template --- docs/docs/specs/engineering/engine.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/docs/specs/engineering/engine.md b/docs/docs/specs/engineering/engine.md index a416bc820..a70bec840 100644 --- a/docs/docs/specs/engineering/engine.md +++ b/docs/docs/specs/engineering/engine.md @@ -48,6 +48,7 @@ Here is an example of an engine file for `engine_id` `nitro`: "embedding": false, "n_parallel": 1, "cont_batching": false + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" } ``` From a4c394dfa81d350c8b9d6fca73c0694968594f2e Mon Sep 17 00:00:00 2001 From: tikikun Date: Mon, 4 Dec 2023 10:49:54 +0700 Subject: [PATCH 08/13] add engine_id field --- docs/docs/specs/engineering/models.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/docs/specs/engineering/models.md b/docs/docs/specs/engineering/models.md index 517b9fefe..1cdc1cd1c 100644 --- a/docs/docs/specs/engineering/models.md +++ b/docs/docs/specs/engineering/models.md @@ -64,6 +64,7 @@ Here's a standard example `model.json` for a GGUF model. "description": null, // Defaults to null "state": enum[null, "downloading", "ready", "starting", "stopping", ...] "format": "ggufv3", // Defaults to "ggufv3" +"egine_id": "nitro", // engine_id specified in jan/engine folder "engine_parameters": { // Engine parameters inside model.json can override "ctx_len": 2048, // the value inside the base engine.json "ngl": 100, From b556e421d199c84d285eabed1b9697644db2f401 Mon Sep 17 00:00:00 2001 From: tikikun Date: Mon, 4 Dec 2023 10:51:24 +0700 Subject: [PATCH 09/13] add engine_id field --- docs/docs/specs/engineering/models.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/specs/engineering/models.md b/docs/docs/specs/engineering/models.md index 1cdc1cd1c..6f53812c4 100644 --- a/docs/docs/specs/engineering/models.md +++ b/docs/docs/specs/engineering/models.md @@ -64,7 +64,7 @@ Here's a standard example `model.json` for a GGUF model. "description": null, // Defaults to null "state": enum[null, "downloading", "ready", "starting", "stopping", ...] "format": "ggufv3", // Defaults to "ggufv3" -"egine_id": "nitro", // engine_id specified in jan/engine folder +"egine": "nitro", // engine_id specified in jan/engine folder "engine_parameters": { // Engine parameters inside model.json can override "ctx_len": 2048, // the value inside the base engine.json "ngl": 100, From 34b251d6894e0abc8080536572589b9b10d84cfc Mon Sep 17 00:00:00 2001 From: tikikun Date: Mon, 4 Dec 2023 13:21:54 +0700 Subject: [PATCH 10/13] fix mistake putting model into engine --- docs/docs/specs/engineering/engine.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/docs/specs/engineering/engine.md b/docs/docs/specs/engineering/engine.md index a70bec840..d25fdfc04 100644 --- a/docs/docs/specs/engineering/engine.md +++ b/docs/docs/specs/engineering/engine.md @@ -42,7 +42,6 @@ Here is an example of an engine file for `engine_id` `nitro`: ```js { - "llama_model_path": "/model/llama-2-7b-model.gguf", "ctx_len": 512, "ngl": 100, "embedding": false, From efe3b2328aaf99058e212d2e82a00186007a4d02 Mon Sep 17 00:00:00 2001 From: tikikun Date: Mon, 4 Dec 2023 15:21:39 +0700 Subject: [PATCH 11/13] only two values now --- docs/docs/specs/engineering/models.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/specs/engineering/models.md b/docs/docs/specs/engineering/models.md index 6f53812c4..b3f560d10 100644 --- a/docs/docs/specs/engineering/models.md +++ b/docs/docs/specs/engineering/models.md @@ -62,7 +62,7 @@ Here's a standard example `model.json` for a GGUF model. "version": "1", // Defaults to 1 "created": 1231231, // Defaults to file creation time "description": null, // Defaults to null -"state": enum[null, "downloading", "ready", "starting", "stopping", ...] +"state": enum[null, ready"] "format": "ggufv3", // Defaults to "ggufv3" "egine": "nitro", // engine_id specified in jan/engine folder "engine_parameters": { // Engine parameters inside model.json can override From a016b73c3abe2e1880be3394ee9b5d2b1425ae00 Mon Sep 17 00:00:00 2001 From: hiento09 <136591877+hiento09@users.noreply.github.com> Date: Mon, 4 Dec 2023 15:30:35 +0700 Subject: [PATCH 12/13] Switch to download nitro .tar.gz file instead of .zip file on windows (#832) Co-authored-by: Hien To --- extensions/inference-extension/download.bat | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extensions/inference-extension/download.bat b/extensions/inference-extension/download.bat index 3dfe34218..5e61c5fcf 100644 --- a/extensions/inference-extension/download.bat +++ b/extensions/inference-extension/download.bat @@ -1,4 +1,4 @@ @echo off set /p NITRO_VERSION=<./nitro/version.txt -.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda.zip -e --strip 1 -o ./nitro/win-cuda -.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.zip -e --strip 1 -o ./nitro/win-cpu +.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda.tar.gz -e --strip 1 -o ./nitro/win-cuda +.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./nitro/win-cpu From fd0f64c899939a18315d84ad9ca48aa94c368bef Mon Sep 17 00:00:00 2001 From: tikikun Date: Mon, 4 Dec 2023 16:22:29 +0700 Subject: [PATCH 13/13] fix typo --- docs/docs/specs/engineering/models.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/specs/engineering/models.md b/docs/docs/specs/engineering/models.md index b3f560d10..2aed95258 100644 --- a/docs/docs/specs/engineering/models.md +++ b/docs/docs/specs/engineering/models.md @@ -62,9 +62,9 @@ Here's a standard example `model.json` for a GGUF model. "version": "1", // Defaults to 1 "created": 1231231, // Defaults to file creation time "description": null, // Defaults to null -"state": enum[null, ready"] +"state": enum[null, "ready"] "format": "ggufv3", // Defaults to "ggufv3" -"egine": "nitro", // engine_id specified in jan/engine folder +"engine": "nitro", // engine_id specified in jan/engine folder "engine_parameters": { // Engine parameters inside model.json can override "ctx_len": 2048, // the value inside the base engine.json "ngl": 100,