* fix: move tensorrt executable to engine (#2400) * fix: move tensorrt executable to engine Signed-off-by: James <james@jan.ai> * some update Signed-off-by: hiro <hiro@jan.ai> * chore: bump tensorrt version * fix: wrong destroy path * fix: install extensions in parallel * chore: update path for tensorrt engine (#2404) Signed-off-by: James <james@jan.ai> Co-authored-by: James <james@jan.ai> --------- Signed-off-by: James <james@jan.ai> Signed-off-by: hiro <hiro@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: hiro <hiro@jan.ai> Co-authored-by: Louis <louis@jan.ai> * Release/v0.4.9 (#2421) * fix: turn off experimental settings should also turn off quick ask (#2411) * fix: app glitches 1s generating response before starting model (#2412) * fix: disable experimental feature should also disable vulkan (#2414) * fix: model load stuck on windows when can't get CPU core count (#2413) Signed-off-by: James <james@jan.ai> Co-authored-by: James <james@jan.ai> * feat: TensorRT-LLM engine update support (#2415) * fix: engine update * chore: add remove prepopulated models Signed-off-by: James <james@jan.ai> * update tinyjensen url Signed-off-by: James <james@jan.ai> * update llamacorn Signed-off-by: James <james@jan.ai> * update Mistral 7B Instruct v0.1 int4 Signed-off-by: James <james@jan.ai> * update tensorrt Signed-off-by: James <james@jan.ai> * update Signed-off-by: hiro <hiro@jan.ai> * update Signed-off-by: James <james@jan.ai> * prettier Signed-off-by: James <james@jan.ai> * update mistral config Signed-off-by: James <james@jan.ai> * fix some lint Signed-off-by: James <james@jan.ai> --------- Signed-off-by: James <james@jan.ai> Signed-off-by: hiro <hiro@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: hiro <hiro@jan.ai> * Tensorrt LLM disable turing support (#2418) Co-authored-by: Hien To <tominhhien97@gmail.com> * chore: add prompt template tensorrtllm (#2375) * chore: add prompt template tensorrtllm * Add Prompt template for mistral and correct model metadata --------- Co-authored-by: Hien To <tominhhien97@gmail.com> * fix: correct tensorrt mistral model.json (#2419) --------- Signed-off-by: James <james@jan.ai> Signed-off-by: hiro <hiro@jan.ai> Co-authored-by: Louis <louis@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: hiro <hiro@jan.ai> Co-authored-by: hiento09 <136591877+hiento09@users.noreply.github.com> Co-authored-by: Hien To <tominhhien97@gmail.com> --------- Signed-off-by: James <james@jan.ai> Signed-off-by: hiro <hiro@jan.ai> Co-authored-by: NamH <NamNh0122@gmail.com> Co-authored-by: James <james@jan.ai> Co-authored-by: hiro <hiro@jan.ai> Co-authored-by: hiento09 <136591877+hiento09@users.noreply.github.com> Co-authored-by: Hien To <tominhhien97@gmail.com>
157 lines
5.6 KiB
JSON
157 lines
5.6 KiB
JSON
[
|
|
{
|
|
"sources": [
|
|
{
|
|
"filename": "config.json",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/config.json"
|
|
},
|
|
{
|
|
"filename": "mistral_float16_tp1_rank0.engine",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine"
|
|
},
|
|
{
|
|
"filename": "tokenizer.model",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.model"
|
|
},
|
|
{
|
|
"filename": "special_tokens_map.json",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json"
|
|
},
|
|
{
|
|
"filename": "tokenizer.json",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.json"
|
|
},
|
|
{
|
|
"filename": "tokenizer_config.json",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json"
|
|
},
|
|
{
|
|
"filename": "model.cache",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/model.cache"
|
|
}
|
|
],
|
|
"id": "llamacorn-1.1b-chat-fp16",
|
|
"object": "model",
|
|
"name": "LlamaCorn 1.1B Chat FP16",
|
|
"version": "1.0",
|
|
"description": "LlamaCorn is a refined version of TinyLlama-1.1B, optimized for conversational quality, running on consumer devices through TensorRT-LLM",
|
|
"format": "TensorRT-LLM",
|
|
"settings": {
|
|
"ctx_len": 2048,
|
|
"text_model": false
|
|
},
|
|
"parameters": {
|
|
"max_tokens": 4096
|
|
},
|
|
"metadata": {
|
|
"author": "LLama",
|
|
"tags": ["TensorRT-LLM", "1B", "Finetuned"],
|
|
"size": 2151000000
|
|
},
|
|
"engine": "nitro-tensorrt-llm"
|
|
},
|
|
{
|
|
"sources": [
|
|
{
|
|
"filename": "config.json",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/config.json"
|
|
},
|
|
{
|
|
"filename": "mistral_float16_tp1_rank0.engine",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine"
|
|
},
|
|
{
|
|
"filename": "tokenizer.model",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.model"
|
|
},
|
|
{
|
|
"filename": "special_tokens_map.json",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json"
|
|
},
|
|
{
|
|
"filename": "tokenizer.json",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.json"
|
|
},
|
|
{
|
|
"filename": "tokenizer_config.json",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json"
|
|
},
|
|
{
|
|
"filename": "model.cache",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/model.cache"
|
|
}
|
|
],
|
|
"id": "tinyjensen-1.1b-chat-fp16",
|
|
"object": "model",
|
|
"name": "TinyJensen 1.1B Chat FP16",
|
|
"version": "1.0",
|
|
"description": "Do you want to chat with Jensen Huan? Here you are",
|
|
"format": "TensorRT-LLM",
|
|
"settings": {
|
|
"ctx_len": 2048,
|
|
"text_model": false
|
|
},
|
|
"parameters": {
|
|
"max_tokens": 4096
|
|
},
|
|
"metadata": {
|
|
"author": "LLama",
|
|
"tags": ["TensorRT-LLM", "1B", "Finetuned"],
|
|
"size": 2151000000
|
|
},
|
|
"engine": "nitro-tensorrt-llm"
|
|
},
|
|
{
|
|
"sources": [
|
|
{
|
|
"filename": "config.json",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/config.json"
|
|
},
|
|
{
|
|
"filename": "mistral_float16_tp1_rank0.engine",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/mistral_float16_tp1_rank0.engine"
|
|
},
|
|
{
|
|
"filename": "tokenizer.model",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.model"
|
|
},
|
|
{
|
|
"filename": "special_tokens_map.json",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/special_tokens_map.json"
|
|
},
|
|
{
|
|
"filename": "tokenizer.json",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.json"
|
|
},
|
|
{
|
|
"filename": "tokenizer_config.json",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer_config.json"
|
|
},
|
|
{
|
|
"filename": "model.cache",
|
|
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/model.cache"
|
|
}
|
|
],
|
|
"id": "mistral-7b-instruct-int4",
|
|
"object": "model",
|
|
"name": "Mistral 7B Instruct v0.1 INT4",
|
|
"version": "1.0",
|
|
"description": "Mistral 7B Instruct v0.1 INT4",
|
|
"format": "TensorRT-LLM",
|
|
"settings": {
|
|
"ctx_len": 2048,
|
|
"text_model": false,
|
|
"prompt_template": "[INST] {prompt} [/INST]"
|
|
},
|
|
"parameters": {
|
|
"max_tokens": 4096
|
|
},
|
|
"metadata": {
|
|
"author": "MistralAI",
|
|
"tags": ["TensorRT-LLM", "7B", "Finetuned"],
|
|
"size": 3840000000
|
|
},
|
|
"engine": "nitro-tensorrt-llm"
|
|
}
|
|
]
|