diff --git a/docs/docs/guides/engines/llama-cpp.md b/docs/docs/guides/engines/llama-cpp.md deleted file mode 100644 index bc485df6c..000000000 --- a/docs/docs/guides/engines/llama-cpp.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: Llama-CPP Extension -slug: /guides/engines/llama-cpp ---- - -## Overview - -[LlamaCPP](https://github.com/ggerganov/llama.cpp) is the default AI engine downloaded with Jan. It is served through Nitro, a C++ inference server, that handles additional UX and hardware optimizations. - -The source code for Nitro-llama-cpp is [here](https://github.com/janhq/nitro). - -There is no additional setup needed. \ No newline at end of file diff --git a/docs/docs/guides/engines/README.mdx b/docs/docs/guides/providers/README.mdx similarity index 54% rename from docs/docs/guides/engines/README.mdx rename to docs/docs/guides/providers/README.mdx index 3a7cdcc44..aa3bfea1f 100644 --- a/docs/docs/guides/engines/README.mdx +++ b/docs/docs/guides/providers/README.mdx @@ -1,6 +1,6 @@ --- -title: Extensions -slug: /guides/engines +title: Inference Providers +slug: /guides/providers --- import DocCardList from "@theme/DocCardList"; diff --git a/docs/docs/guides/engines/image.png b/docs/docs/guides/providers/image.png similarity index 100% rename from docs/docs/guides/engines/image.png rename to docs/docs/guides/providers/image.png diff --git a/docs/docs/guides/providers/llama-cpp.md b/docs/docs/guides/providers/llama-cpp.md new file mode 100644 index 000000000..3a21e80a7 --- /dev/null +++ b/docs/docs/guides/providers/llama-cpp.md @@ -0,0 +1,10 @@ +--- +title: llama.cpp +slug: /guides/providers/llama-cpp +--- + +## Overview + +[Nitro](https://github.com/janhq/nitro) is an inference server on top of [llama.cpp](https://github.com/ggerganov/llama.cpp). OpenAI-compatible API, queue, & scaling. + +Nitro is the default AI engine downloaded with Jan. There is no additional setup needed. \ No newline at end of file diff --git a/docs/docs/guides/engines/tensorrt-llm.md b/docs/docs/guides/providers/tensorrt-llm.md similarity index 95% rename from docs/docs/guides/engines/tensorrt-llm.md rename to docs/docs/guides/providers/tensorrt-llm.md index 177cc0cf4..4b0edec2a 100644 --- a/docs/docs/guides/engines/tensorrt-llm.md +++ b/docs/docs/guides/providers/tensorrt-llm.md @@ -1,6 +1,6 @@ --- -title: TensorRT-LLM Extension -slug: /guides/engines/tensorrt-llm +title: TensorRT-LLM +slug: /guides/providers/tensorrt-llm --- Users with Nvidia GPUs can get 20-40% faster* token speeds on their laptop or desktops by using [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). diff --git a/docs/sidebars.js b/docs/sidebars.js index 8deafeaa1..b95e4044f 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -201,15 +201,15 @@ const sidebars = { }, { type: "category", - label: "AI Engines", + label: "Inference Providers", className: "head_SubMenu", link: { type: 'doc', - id: "guides/engines/README", + id: "guides/providers/README", }, items: [ - "guides/engines/llama-cpp", - "guides/engines/tensorrt-llm", + "guides/providers/llama-cpp", + "guides/providers/tensorrt-llm", ] }, {