new docs revamped for Jan v0.5.18

2025-06-10 00:55:51 +10:00 · 2025-06-10 00:55:51 +10:00 · 96492f6a1a
commit 96492f6a1a
parent 051d6d3727
139 changed files with 722 additions and 4887 deletions
--- a/docs/src/components/Home/Hero/index.tsx
+++ b/docs/src/components/Home/Hero/index.tsx
@ -80,7 +80,7 @@ const Hero = () => {
          </div>

          <p className="text-xl -mt-1 leading-relaxed text-black/60 dark:text-white/60">
-            Jan is an open source ChatGPT-alternative that runs 100% offline.
+            Jan is an open source AI Assistant that runs 100% offline.
          </p>
        </div>
        <div className="mb-4 mt-8">
--- a/docs/src/pages/_meta.json
+++ b/docs/src/pages/_meta.json
@ -11,16 +11,6 @@
    "type": "page",
    "title": "Documentation"
  },
-  "cortex": {
-    "type": "page",
-    "title": "Cortex",
-    "display": "hidden"
-  },
-  "integrations": {
-    "type": "page",
-    "title": "Integrations",
-    "display": "hidden"
-  },
  "changelog": {
    "type": "page",
    "title": "Changelog",
--- a/docs/src/pages/cortex/_assets/architecture.png
+++ b/docs/src/pages/cortex/_assets/architecture.png
--- a/docs/src/pages/cortex/_assets/cortex-cover.png
+++ b/docs/src/pages/cortex/_assets/cortex-cover.png
--- a/docs/src/pages/cortex/_assets/cortex-llamacpp-act.png
+++ b/docs/src/pages/cortex/_assets/cortex-llamacpp-act.png
--- a/docs/src/pages/cortex/_assets/cortex-llamacpp-arch.png
+++ b/docs/src/pages/cortex/_assets/cortex-llamacpp-arch.png
--- a/docs/src/pages/cortex/_meta.json
+++ b/docs/src/pages/cortex/_meta.json
@ -1,136 +0,0 @@
-{
-  "-- Switcher": {
-    "type": "separator",
-    "title": "Switcher"
-  },
-  "get-started": {
-    "title": "GET STARTED",
-    "type": "separator"
-  },
-  "index": {
-    "title": "Overview",
-    "href": "/cortex"
-  },
-  "quickstart": {
-    "title": "Quickstart"
-  },
-  "hardware": {
-    "title": "Hardware"
-  },
-  "installation": {
-    "title": "Installation"
-  },
-  "basicusage": {
-    "title": "BASIC USAGE",
-    "type": "separator"
-  },
-  "command-line": {
-    "title": "CLI"
-  },
-  "ts-library": {
-    "title": "Typescript Library"
-  },
-  "py-library": {
-    "title": "Python Library"
-  },
-  "server": {
-    "title": "Server Endpoint"
-  },
-  "capabilities": {
-    "title": "CAPABILITIES",
-    "type": "separator"
-  },
-  "text-generation": {
-    "title": "Text Generation"
-  },
-  "function-calling": {
-    "display": "hidden",
-    "title": "Function Calling"
-  },
-  "embeddings": {
-    "display": "hidden",
-    "title": "Embeddings"
-  },
-  "fine-tuning": {
-    "display": "hidden",
-    "title": "Fine-tuning"
-  },
-  "vision": {
-    "display": "hidden",
-    "title": "Vision"
-  },
-  "model-operations": {
-    "display": "hidden",
-    "title": "Model Operations"
-  },
-  "rag": {
-    "display": "hidden",
-    "title": "RAG"
-  },
-  "assistant": {
-    "display": "hidden",
-    "title": "ASSISTANTS",
-    "type": "separator"
-  },
-  "assistants": {
-    "display": "hidden",
-    "title": "Overview"
-  },
-  "commandline": {
-    "title": "COMMAND LINE",
-    "type": "separator"
-  },
-  "cli": {
-    "title": "cortex"
-  },
-  "training-engines": {
-    "display": "hidden",
-    "title": "TRAINING ENGINES"
-  },
-  "extensions": {
-    "display": "hidden",
-    "title": "EXTENSIONS",
-    "type": "separator"
-  },
-  "build-extension": {
-    "display": "hidden",
-    "title": "Build an Extension"
-  },
-  "architectures": {
-    "title": "ARCHITECTURE",
-    "type": "separator"
-  },
-  "architecture": {
-    "title": "Cortex"
-  },
-  "cortex-cpp": {
-    "title": "Cortex.cpp"
-  },
-  "cortex-llamacpp": {
-    "title": "Cortex.llamacpp"
-  },
-  "cortex-tensorrt-llm": {
-    "title": "Cortex.tensorrt-llm",
-    "display": "hidden"
-  },
-  "cortex-python": {
-    "title": "Cortex.python",
-    "display": "hidden"
-  },
-  "cortex-openvino": {
-    "title": "Cortex.OpenVino",
-    "display": "hidden"
-  },
-  "ext-architecture": {
-    "display": "hidden",
-    "title": "Extensions"
-  },
-  "troubleshooting": {
-    "title": "TROUBLESHOOTING",
-    "type": "separator"
-  },
-  "error-codes": {
-    "display": "hidden",
-    "title": "Error Codes"
-  }
-}
--- a/docs/src/pages/cortex/architecture.mdx
+++ b/docs/src/pages/cortex/architecture.mdx
@ -1,202 +0,0 @@
---
-title: Overview
-description: Cortex Architecture
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-## Introduction
-
-Cortex is an alternative to the OpenAI API designed to operate entirely on your local hardware infrastructure. This headless backend platform is also engineered to support TensorRT-LLM, ensuring high-performance machine-learning model execution. It is packaged with a Docker-inspired command-line interface and a Typescript client library.
-
-The following guide details Cortex's core components, providing insights and instructions for those interested in customizing It to meet specific requirements.
-
-
-## Architecture
-
-![Architecture](./_assets/architecture.png)
-
-### Main Components
-
-Cortex is architected with several key components, each designed to fulfill specific roles within the system, ensuring efficient processing and response to client requests.
-
-1. **Cortex JS**: This component acts as the interface layer where requests are received and responses are sent.
-2. **Server:** The central processing unit of Cortex, this component coordinates all activities across the system. It manages the data flow and ensures operations are correctly executed.
-3. **Kernel**: This component checks the server's hardware configuration. Based on the current hardware setup, it determines whether additional dependencies are required, optimizing the system for performance and compatibility.
-4. **Runtime**: This process involves dynamically loading necessary libraries and models based on the server's current needs and processing requests.
-5. **Dynamic Libraries**: Consists of inference engines loaded on-demand to enhance Cortex's processing power. These engines are essential for performing specialized computational tasks. Currently, Cortex supports:
-    - Llama.cpp Engine
-    - TensorRT-LLM Engine
-    - Python-runtime Engine
-
-### Data Structure
-
-Cortex is equipped with **MySQL** and **SQLite** databases, offering flexible data management options that can be easily adapted to different environments and requirements. It also has a filesystem data that can store and retrieve data using file-based mechanisms.
-
-#### MySQL
-
-This database is used because it is ideal for Cortex environments where scalability, security, and data integrity are critical. MySQL is well-suited for handling large model-size data from the core extensions.
-
-#### SQLite
-
-This database is used for simplicity and minimal setup. It can handle the small model size from the core extensions and any data from the External extensions.
-
-#### File System
-
-Cortex uses a filesystem approach for managing configuration files, such as `model.yaml` files. These files are stored in a structured directory hierarchy, enabling efficient data retrieval and management.
-
-### Providers
-
-#### Internal Provider
-
-Integral to the CLI, it includes the core binary (**`.cpp`**) and is compiled directly with the CLI, facilitating all application parts' direct access to core functionalities.
-
-#### Core Extensions
-
-These are bundled with the CLI and include additional functionalities like remote engines and API models, facilitating more complex operations and interactions within the same architectural framework.
-
-#### External Extensions
-
-These are designed to be more flexible and are stored externally. They represent potential future expansions or integrations, allowing the architecture to extend its capabilities without modifying the core system.
-
-### Key Dependencies
-
-Cortex developed using NestJS and operates via a Node.js server framework, handling all incoming and outgoing requests. It also has a C++ runtime to handle stateless requests. 
-
-Below is a detailed overview of its core architecture components:
-
-#### NestJS Framework
-
-NestJS framework serves as the backbone of the Cortex. This framework facilitates the organization of server-side logic into modules, controllers, and extensions, which are important for maintaining a clean codebase and efficient request handling.
-
-#### Node.js Server
-
-Node.js is the primary runtime for Cortex, which handles the HTTP requests, executes the server-side logic, and manages the responses.
-
-#### C++ Runtime
-
-C++ runtime is important for managing stateless requests. This component can handle intensive tasks that require optimized performance.
-
-## Code Structure
-
-The repository is organized to separate concerns between domain definitions, business rules, and adapters or implementations.
-```
-# Entity Definitions
-domain/                    # This is the core directory where the domains are defined.
-  abstracts/               # Abstract base classes for common attributes and methods.
-  models/                  # Domain interface definitions, e.g. model, assistant.
-  repositories/            # Extensions abstract and interface
-
-# Business Rules
-usecases/                  # Application logic 
-	assistants/              # CRUD logic (invokes dtos, entities).
-	chat/                    # Logic for chat functionalities.
-	models/                  # Logic for model operations.
-
-# Adapters & Implementations
-infrastructure/            # Implementations for Cortex interactions
-  commanders/              # CLI handlers
-    models/
-    questions/             # CLI installation UX
-    shortcuts/             # CLI chained syntax
-    types/
-    usecases/              # Invokes UseCases
-
-  controllers/             # Nest controllers and HTTP routes
-		assistants/						 # Invokes UseCases
-	  chat/     						 # Invokes UseCases
-		models/                # Invokes UseCases
-	
-  database/                # Database providers (mysql, sqlite)
-	
-	# Framework specific object definitions
-  dtos/                    # DTO definitions (data transfer & validation)
-  entities/                # TypeORM entity definitions (db schema)
-  
-	# Providers
-  providers/cortex         # Cortex [server] provider (a core extension)
-  repositories/extensions  # Extension provider (core & external extensions)
-
-extensions/                # External extensions
-command.module.ts          # CLI Commands List
-main.ts                    # Entrypoint
-
-```
-<Callout type="info">
-The structure above promotes clean architecture principles, allowing for scalable and maintainable Cortex development.
-</Callout>
-
-## Runtime
-```mermaid
-sequenceDiagram
-    User-)Cortex: "Tell me a joke"
-    Cortex->>HF: Download a model
-    Cortex->>Model Controller/Service: Start the model
-    Cortex->>Chat Controller/Service: POST /completions 
-    Chat Controller/Service ->> Chat UseCases: createChatCompletions()
-    Chat UseCases -->> Model Entity: findOne()
-    Cortex->>Model Entity: Store the model data
-    Chat UseCases -->> Extension Repository: findAll()
-    Extension Repository ->> Cortex Provider: inference()
-    CortexCPP Server ->> Cortex Provider: Port /???
-
-    %% Responses
-    Cortex Provider ->> Extension Repository: inference()
-    Extension Repository ->> Chat UseCases: Response stream
-    Chat UseCases ->> Chat Controller/Service: Formatted response/stream
-    Chat Controller/Service ->> User: "Your mama"
-```
-The sequence diagram above outlines the interactions between various components in the Cortex system during runtime, particularly when handling user requests via a CLI. Here’s a detailed breakdown of the runtime sequence:
-
-1. **User Request**: The user initiates an interaction by requesting “a joke” via the Cortex CLI.
-2. **Model Activation**:
-    - The API directs the request to the `Model Controller/Service`.
-    - The service pulls and starts the appropriate model and posts a request to `'/completions'` to prepare the model for processing.
-3. **Chat Processing**:
-    - The `Chat Controller/Service` processes the user's request using `Chat UseCases`.
-    - The `Chat UseCases` interact with the Model Entity and Extension Repository to gather necessary data and logic.
-4. **Data Handling and Response Formation**:
-    - The `Model Entity` and `Extension Repository` perform data operations, which may involve calling a `Provider` for additional processing.
-    - Data is fetched, stored, and an inference is performed as needed.
-5. **Response Delivery**:
-    - The response is formatted by the `Chat UseCases` and streamed back to the user through the API.
-    - The user receives the processed response, completing the cycle of interaction.
-
-## Roadmap
-
-Our development roadmap outlines key features and epics we will focus on in the upcoming releases. These enhancements aim to improve functionality, increase efficiency, and expand Cortex's capabilities.
-
- **Crash Report Telemetry**: Enhance error reporting and operational stability by automatically collecting and analyzing crash reports.
- **RAG**: Improve response quality and contextual relevance in our AI models.
- **Cortex TensorRT-LLM**: Optimize NVIDIA TensorRT optimizations for LLMs.
- **Cortex Presets**: Streamline model configurations.
- **Cortex Python Runtime**: Provide a scalable Python execution environment for Cortex.
-
-## Risks & Technical Debt
-
-Cortex CLI, built with Nest-commander, incorporates extensions to integrate various inference providers. This flexibility, however, introduces certain risks related to dependency management and the objective of bundling the CLI into a single executable binary.
-
-### Key Risks
-
-1. **Complex Dependencies**: Utilizing Nest-commander involves a deep dependency tree, risking version conflicts and complicating updates.
-2. **Bundling Issues**: Converting to a single executable can reveal issues with `npm` dependencies and relative asset paths, leading to potential runtime errors due to unresolved assets or incompatible binary dependencies.
--- a/docs/src/pages/cortex/build-extension.mdx
+++ b/docs/src/pages/cortex/build-extension.mdx
@ -1,22 +0,0 @@
---
-title: Build an Extension
-description: Build an Extension
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
--- a/docs/src/pages/cortex/cli.mdx
+++ b/docs/src/pages/cortex/cli.mdx
@ -1,54 +0,0 @@
---
-title: Command Line Interface
-description: Cortex CLI.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# Cortex
-
-Cortex is a CLI tool used to interact with the Jan application and its various functions.
-
-<Callout type="info">
-Cortex CLI is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex [command] [flag]
-```
-### Options
-```
-  -v, --version    Cortex version (default: false)
-  -h, --help       display help for command
-```
-## Sub Commands
-  [cortex models](/cortex/cli/models): Manage and configure models.
-  [cortex serve](/cortex/cli/serve): Launch an API endpoint server for the Cortex backend.
-  [cortex chat](/cortex/cli/chat): Send a chat request to a model.
-  [cortex init|setup](/cortex/cli/init): Initialize settings and download dependencies for Cortex.
-  [cortex ps](/cortex/cli/ps): Display active models and their operational status.
-  [cortex kill](/cortex/cli/kill): Terminate active Cortex processes.
-  [cortex pull|download](/cortex/cli/pull): Download a model.
-  [cortex run](/cortex/cli/run): Shortcut to start a model and chat **(EXPERIMENTAL)**.
--- a/docs/src/pages/cortex/cli/_meta.json
+++ b/docs/src/pages/cortex/cli/_meta.json
@ -1,26 +0,0 @@
-{
-  "init": {
-    "title": "cortex init"
-  },
-  "pull": {
-    "title": "cortex pull"
-  },
-  "run": {
-    "title": "cortex run"
-  },
-  "models": {
-    "title": "cortex models"
-  },
-  "ps": {
-    "title": "cortex ps"
-  },
-  "chat": {
-    "title": "cortex chat"
-  },
-  "kill": {
-    "title": "cortex kill"
-  },
-  "serve": {
-    "title": "cortex serve"
-  }
-}
--- a/docs/src/pages/cortex/cli/chat.mdx
+++ b/docs/src/pages/cortex/cli/chat.mdx
@ -1,47 +0,0 @@
---
-title: Cortex Chat
-description: Cortex chat command.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex chat`
-
-This command starts a chat session with a specified model, allowing you to interact directly with it through an interactive chat interface.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex chat --model MODEL_ID
-```
-### Options
-```
-  -t, --thread <thread_id>  Thread Id. If not provided, will create new thread
-  -m, --message <message>   Message to send to the model
-  -a, --attach              Attach to interactive chat session (default: false)
-  -h, --help                display help for command
-```
--- a/docs/src/pages/cortex/cli/init.mdx
+++ b/docs/src/pages/cortex/cli/init.mdx
@ -1,49 +0,0 @@
---
-title: Cortex Models Init
-description: Cortex init command.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex init`
-
-This command initializes the cortex operations settings and downloads the required dependencies to run cortex.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Alias
-The following alias is also available for initializing cortex:
- `cortex setup`
-
-## Usage
-
-```bash
-cortex init
-```
-
-## Options
-```
-  -h, --help              display help for command
-```
--- a/docs/src/pages/cortex/cli/kill.mdx
+++ b/docs/src/pages/cortex/cli/kill.mdx
@ -1,45 +0,0 @@
---
-title: Cortex Kill
-description: Cortex kill command.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex kill`
-
-This command stops the currently running cortex processes.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex kill
-```
-
-## Options
-```
-  -h, --help              display help for command
-```
--- a/docs/src/pages/cortex/cli/models.mdx
+++ b/docs/src/pages/cortex/cli/models.mdx
@ -1,52 +0,0 @@
---
-title: Cortex Models
-description: Cortex CLI.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models`
-
-This command allows you to start, stop, and manage various model operations within Cortex.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models API_COMMAND [OPTIONS]
-
-# Start a downloaded model
-cortex models start MODEL_ID
-
-# Stop a downloaded model
-cortex models stop MODEL_ID
-```
-
-## Options
-
-```
-  -h, --help     display help for command
-```
--- a/docs/src/pages/cortex/cli/models/_meta.json
+++ b/docs/src/pages/cortex/cli/models/_meta.json
@ -1,23 +0,0 @@
-{
-  "download": {
-    "title": "cortex models pull"
-  },
-  "list": {
-    "title": "cortex models list"
-  },
-  "get": {
-    "title": "cortex models get"
-  },
-  "update": {
-    "title": "cortex models update"
-  },
-  "start": {
-    "title": "cortex models start"
-  },
-  "stop": {
-    "title": "cortex models stop"
-  },
-  "remove": {
-    "title": "cortex models remove"
-  }
-}
--- a/docs/src/pages/cortex/cli/models/download.mdx
+++ b/docs/src/pages/cortex/cli/models/download.mdx
@ -1,49 +0,0 @@
---
-title: Cortex Models Pull
-description: Cortex models subcommands.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models pull`
-
-This command downloads a model. You can use a HuggingFace `MODEL_ID` to download a model.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models pull MODEL_ID
-```
-## Alias
-The following alias is also available for downloading models:
- `cortex models download _`
-
-## Options
-```
-  -m, --model <model_id>  Model Id to start chat with
-  -h, --help              display help for command
-```
--- a/docs/src/pages/cortex/cli/models/get.mdx
+++ b/docs/src/pages/cortex/cli/models/get.mdx
@ -1,45 +0,0 @@
---
-title: Cortex Models Get
-description: Cortex models subcommands.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models get`
-
-This command returns a model detail defined by a `MODEL_ID`.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models get MODEL_ID
-```
-
-## Options
-```
-  -h, --help              display help for command
-```
--- a/docs/src/pages/cortex/cli/models/list.mdx
+++ b/docs/src/pages/cortex/cli/models/list.mdx
@ -1,46 +0,0 @@
---
-title: Cortex Models List
-description: Cortex models subcommands.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models list`
-
-This command lists all local models.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models list
-```
-
-## Options
-```
-  -f, --format <format>  Print models list in table or json format (default: "json")
-  -h, --help             display help for command
-```
--- a/docs/src/pages/cortex/cli/models/remove.mdx
+++ b/docs/src/pages/cortex/cli/models/remove.mdx
@ -1,45 +0,0 @@
---
-title: Cortex Models Remove
-description: Cortex models subcommands.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models remove`
-
-This command deletes a local model defined by a `MODEL_ID`.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models remove MODEL_ID
-```
-
-## Options
-```
-  -h, --help              display help for command
-```
--- a/docs/src/pages/cortex/cli/models/start.mdx
+++ b/docs/src/pages/cortex/cli/models/start.mdx
@ -1,46 +0,0 @@
---
-title: Cortex Models Start
-description: Cortex models subcommands.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models start`
-
-This command starts a model defined by a `MODEL_ID`.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models start MODEL_ID
-```
-
-## Options
-```
-  -a, --attach  Attach to interactive chat session (default: false)
-  -h, --help    display help for command
-```
--- a/docs/src/pages/cortex/cli/models/stop.mdx
+++ b/docs/src/pages/cortex/cli/models/stop.mdx
@ -1,45 +0,0 @@
---
-title: Cortex Models Stop
-description: Cortex models subcommands.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models stop`
-
-This command stops a model defined by a `MODEL_ID`.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models stop MODEL_ID
-```
-
-## Options
-```
-  -h, --help              display help for command
-```
--- a/docs/src/pages/cortex/cli/models/update.mdx
+++ b/docs/src/pages/cortex/cli/models/update.mdx
@ -1,48 +0,0 @@
---
-title: Cortex Models Update
-description: Cortex models subcommands.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models update`
-
-This command updates a model configuration defined by a `MODEL_ID`.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models update MODEL_ID OPTIONS
-```
-
-## Options
-```
-  -m, --model <model_id>      Model Id to update
-  -c, --options <options...>  Specify the options to update the model. Syntax: -c option1=value1 option2=value2. For
-                              example: cortex models update -c max_tokens=100 temperature=0.5
-  -h, --help                  display help for command
-```
--- a/docs/src/pages/cortex/cli/ps.mdx
+++ b/docs/src/pages/cortex/cli/ps.mdx
@ -1,48 +0,0 @@
---
-title: Cortex Ps
-description: Cortex ps command.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex ps`
-
-This command shows the running model and its status.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex ps
-```
-For example, it returns the following table:
-```bash
-┌─────────┬──────────────────────┬───────────────────┬───────────┬──────────┬─────┬──────┐
-│ (index) │ modelId              │ engine            │ status    │ duration │ ram │ vram │
-├─────────┼──────────────────────┼───────────────────┼───────────┼──────────┼─────┼──────┤
-│ 0       │ 'janhq/tinyllama/1b' │ 'cortex.llamacpp' │ 'running' │ '7s'     │ '-' │ '-'  │
-└─────────┴──────────────────────┴───────────────────┴───────────┴──────────┴─────┴──────┘
-```
--- a/docs/src/pages/cortex/cli/pull.mdx
+++ b/docs/src/pages/cortex/cli/pull.mdx
@ -1,82 +0,0 @@
---
-title: Cortex Pull
-description: Cortex CLI.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex pull`
-
-This command facilitates downloading machine learning models from various model hubs, including the popular 🤗 [Hugging Face](https://huggingface.co/).
-
-By default, models are downloaded to the `node_modules library path. For additional information on storage paths and options, refer [here](/cortex/cli#storage).
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Alias
-The following alias is also available for downloading models:
- `cortex download _`
-
-## Usage
-
-### Preconfigured Models
-
-Reconfigured models (with optimal runtime parameters and templates) are available from the [Jan Model Hub](https://huggingface.co/janhq) on Hugging Face.
-
-Models can be downloaded using a Docker-like interface with the following syntax: `repo_name:branch_name`. Each variant may include different quantizations and sizes, typically organized in the repository’s branches.
-
-Available models include [llama3](https://huggingface.co/janhq/llama3), [mistral](https://huggingface.co/janhq/mistral), [tinyllama](https://huggingface.co/janhq/tinyllama), and [many more](https://huggingface.co/janhq).
-
-<Callout type="info">
-New models will soon be added to HuggingFace's janhq repository.
-</Callout>
-
-```bash
-# Pull a specific variant with `repo_name:branch`
-cortex pull llama3:7b
-```
-You can also download `size`, `format`, and `quantization` variants of each model.
-
-```bash
-cortex pull llama3:8b-instruct-v3-gguf-Q4_K_M
-cortex pull llama3:8b-instruct-v3-tensorrt-llm
-```
-<Callout type="info">
-Model variants are provided via the `branches` in each model's Hugging Face repo.
-</Callout>
-### Hugging Face Models
-
-You can download any GGUF, TensorRT, or supported-format model directly from Hugging Face.
-
-```bash
-# cortex pull org_name/repo_name
-cortex pull microsoft/Phi-3-mini-4k-instruct-gguf
-```
-
-## Options
-
-```
-  -h, --help     display help for command
-```
--- a/docs/src/pages/cortex/cli/run.mdx
+++ b/docs/src/pages/cortex/cli/run.mdx
@ -1,53 +0,0 @@
---
-title: Cortex Run
-description: Cortex run command
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex run`
-
-This command facilitates the initiation of an interactive chat shell with a specified machine learning model.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex run MODEL_ID
-```
-### Options
-```
-  -t, --thread <thread_id>  Thread Id. If not provided, will create new thread
-  -h, --help                display help for command
-```
-
-## Command Chain
-
-`cortex run` command is a convenience wrapper that automatically executes a sequence of commands to simplify user interactions:
-
-1. [`cortex start`](/cortex/cli/models/start): This command starts the specified model, making it active and ready for interactions.
-2. [`cortex chat`](/cortex/cli/chat): Following model activation, this command opens an interactive chat shell where users can directly communicate with the model.
-
--- a/docs/src/pages/cortex/cli/serve.mdx
+++ b/docs/src/pages/cortex/cli/serve.mdx
@ -1,46 +0,0 @@
---
-title: Cortex Models Serve
-description: Cortex serve command.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex serve`
-
-This command runs the API endpoint server for the Cortex back-end.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex serve
-```
-
-## Options
-```
-  -h, --host              configure the host for the API endpoint server
-  -h, --help              display help for command
-```
--- a/docs/src/pages/cortex/command-line.mdx
+++ b/docs/src/pages/cortex/command-line.mdx
@ -1,81 +0,0 @@
---
-title: Command Line Interface
-description: Cortex CLI.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# Command Line Interface
-
-The Cortex CLI provides a user-friendly platform for managing and operating large language models (LLMs), inspired by tools like Docker and GitHub CLI. Designed for straightforward installation and use, it simplifies the integration and management of LLMs.
-
-<Callout type="info">
-The Cortex CLI is OpenAI-compatible.
-</Callout>
-
-## Installation
-To get started with the Cortex CLI, please see our guides:
- [Quickstart](/cortex/quickstart)
- [Device specific installation](/cortex/installation)
-
-These resources provide detailed instructions to ensure Cortex is set up correctly on your machine, accommodating various hardware environments.
-
-## Usage
-
-The Cortex CLI has a robust command set that streamlines your LLM interactions.
-
-Check out the [CLI reference pages](/cortex/cli) for a comprehensive guide on all available commands and their specific functions.
-
-## Storage
-
-By default, Cortex CLI stores model binaries, thread history, and other usage data in:
-`$(npm list -g @janhq/cortex)`. 
-
-You can find the respective folders within the `/lib/node_modules/@janhq/cortex/dist/` subdirectory.
-
-<Callout type="info">
-**Ongoing Development**:
- Customizable Storage Locations
- Database Integration
-</Callout>
-
-## CLI Syntax
-
-The Cortex CLI improves the developer experience by incorporating command chaining and syntactic enhancements. 
-This approach automatically combines multiple operations into a single command, streamlining complex workflows. It simplifies the execution of extensive processes through integrated commands.
-
-### OpenAI API Equivalence
-
-The design of Cortex CLI commands strictly adheres to the method names used in the OpenAI API as a standard practice. This ensures a smooth transition for users familiar with OpenAI’s system.
-
-For example:
- The `cortex chat` command is equivalent to the [`POST /v1/chat/completions` endpoint](/cortex/cortex-chat). 
-
- The `cortex models get ID` command is equivalent to the [`GET /models ${ID}` endpoint](/cortex/cortex-models).
-
-### Command Chaining
-
-Cortex CLI’s command chaining support allows multiple commands to be executed in sequence with a simplified syntax. This approach reduces the complexity of command inputs and speeds up development tasks.
-
-For example:
- The [`cortex run`](/cortex/cortex-run), inspired by Docker and Github, starts the models and the inference engine, and provides a command line chat interface for easy testing.
--- a/docs/src/pages/cortex/cortex-cpp.mdx
+++ b/docs/src/pages/cortex/cortex-cpp.mdx
@ -1,77 +0,0 @@
---
-title: Cortex.cpp
-description: Cortex.cpp Architecture
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-# Cortex.cpp
-
-Cortex.cpp is a stateless, C++ server that is 100% compatible with OpenAI API (stateless endpoints). 
-
-It includes a Drogon server, with request queues, model orchestration logic, and hardware telemetry, and more, for prod environments.
-
-This guide walks you through how Cortex.CPP is designed, the codebase structure, and future plans.
-
-## Usage
-
-See [Quickstart](/cortex/quickstart)
-
-## Interface
-
-## Architecture
-
-## Code Structure
-
-```md
-├── app/
-│   │   ├── controllers/
-│   │   ├── models/
-│   │   ├── services/
-│   │   ├── ?engines/
-│   │   │   ├── llama.cpp
-│   │   │   ├── tensorrt-llm
-│   │   │   └── ...
-│   │   └── ...
-│   ├── CMakeLists.txt
-│   ├── config.json
-│   ├── Dockerfile
-│   ├── docker-compose.yml
-│   ├── README.md
-│   └── ...
-```
-
-`cortex-cpp` folder contains stateless implementations, most of which call into `cortex.llamacpp` and `cortex.tensorrt-llm`, depending on the engine at runtime.
-
-Here you will find the implementations for stateless endpoints: 
- `/chat/completion`
- `/audio`
- `/fine_tuning`
- `/embeddings`
- `/load_model`
- `/unload_model`
-
-And core hardware and model management logic like CPU instruction set detection, and multiple model loading logic. 
-
-## Runtime
-
-## Roadmap
--- a/docs/src/pages/cortex/cortex-llamacpp.mdx
+++ b/docs/src/pages/cortex/cortex-llamacpp.mdx
@ -1,143 +0,0 @@
---
-title: Cortex.llamacpp
-description: Cortex.llamacpp Architecture
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-# Cortex.llamacpp
-
-Cortex.llamacpp is a C++ inference library that can be loaded by any server at runtime. It submodules (and occasionally upstreams) [llama.cpp](https://github.com/ggerganov/llama.cpp) for GGUF inference. 
-
-In addition to llama.cpp, cortex.llamacpp adds: 
- OpenAI compatibility for the stateless endpoints
- Model orchestration like model warm up and concurrent models
-
-<Callout type="info">
-Cortex.llamacpp is formerly called "Nitro".
-</Callout>
-
-If you already use [Jan](/docs) or [Cortex](/cortex), cortex.llamacpp is bundled by default and you don’t need this guide. This guides walks you through how to use cortex.llamacpp as a standalone library, in any custom C++ server.
-
-## Usage
-
-To include cortex.llamacpp in your own server implementation, follow this [server example](https://github.com/menloresearch/cortex.llamacpp/tree/main/examples/server).
-
-## Interface
-
-Cortex.llamacpp has the following Interfaces:
-
- **HandleChatCompletion:** Processes chat completion tasks
-    
-    ```cpp
-    void HandleChatCompletion(
-          std::shared_ptr<Json::Value> jsonBody,
-          std::function<void(Json::Value&&, Json::Value&&)>&& callback);
-    ```
-    
- **HandleEmbedding:** Generates embeddings for the input data provided
-    
-    ```cpp
-    void HandleEmbedding(
-          std::shared_ptr<Json::Value> jsonBody,
-          std::function<void(Json::Value&&, Json::Value&&)>&& callback);
-    ```
-    
- **LoadModel:** Loads a model based on the specifications
-    
-    ```cpp
-    void LoadModel(
-          std::shared_ptr<Json::Value> jsonBody,
-          std::function<void(Json::Value&&, Json::Value&&)>&& callback);
-    ```
-    
- **UnloadModel:** Unloads a model as specified
-    
-    ```cpp
-    void UnloadModel(
-          std::shared_ptr<Json::Value> jsonBody,
-          std::function<void(Json::Value&&, Json::Value&&)>&& callback);
-    ```
-    
- **GetModelStatus:** Retrieves the status of a model
-    
-    ```cpp
-    void GetModelStatus(
-          std::shared_ptr<Json::Value> jsonBody,
-          std::function<void(Json::Value&&, Json::Value&&)>&& callback);
-    ```
-    
-**Parameters:**
-
- **`jsonBody`**: The request content in JSON format.
- **`callback`**: A function that handles the response
-
-## Architecture
-
-The main components include:
- `enginei`: an engine interface definition that extends to all engines, handling endpoint logic and facilitating communication between `cortex.cpp` and `llama engine`.
- `llama engine`: exposes APIs for embedding and inference. It loads and unloads models and simplifies API calls to `llama.cpp`.
- `llama.cpp`: submodule from the `llama.cpp` repository that provides the core functionality for embeddings and inferences.
- `llama server context`: a wrapper offers a simpler and more user-friendly interface for `llama.cpp` APIs
-
-![Cortex llamacpp architecture](./_assets/cortex-llamacpp-arch.png)
-
-### Communication Protocols:
-
- `Streaming`: Responses are processed and returned one token at a time.
- `RESTful`: The response is processed as a whole. After the llama server context completes the entire process, it returns a single result back to cortex.cpp.
-
-![Cortex llamacpp architecture](./_assets/cortex-llamacpp-act.png)
-
-## Code Structure
-
-```
-.
-├── base                              # Engine interface definition
-|   └── cortex-common                 # Common interfaces used for all engines
-|      └── enginei.h                  # Define abstract classes and interface methods for engines
-├── examples                          # Server example to integrate engine
-│   └── server.cc                     # Example server demonstrating engine integration
-├── llama.cpp                         # Upstream llama.cpp repository
-│   └── (files from upstream llama.cpp)
-├── src                               # Source implementation for llama.cpp
-│   ├── chat_completion_request.h     # OpenAI compatible request handling
-│   ├── llama_client_slot             # Manage vector of slots for parallel processing
-│   ├── llama_engine                  # Implementation of llamacpp engine for model loading and inference 
-│   ├── llama_server_context          # Context management for chat completion requests
-│   │   ├── slot                      # Struct for slot management
-│   │   └── llama_context             # Struct for llama context management
-|   |   └── chat_completion           # Struct for chat completion management
-|   |   └── embedding                 # Struct for embedding management
-├── third-party                       # Dependencies of the cortex.llamacpp project
-│   └── (list of third-party dependencies)
-```
-
-## Runtime
-
-## Roadmap
-The future plans for Cortex.llamacpp are focused on enhancing performance and expanding capabilities. Key areas of improvement include:
-
- Performance Enhancements: Optimizing speed and reducing memory usage to ensure efficient processing of tasks.
- Multimodal Model Compatibility: Expanding support to include a variety of multimodal models, enabling a broader range of applications and use cases.
-
-To follow the latest developments, see the [cortex.llamacpp GitHub](https://github.com/menloresearch/cortex.llamacpp)
--- a/docs/src/pages/cortex/cortex-openvino.mdx
+++ b/docs/src/pages/cortex/cortex-openvino.mdx
@ -1,24 +0,0 @@
---
-title: Cortex.OpenVino
-description: Cortex.OpenVino Architecture
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-# Cortex.OpenVino
--- a/docs/src/pages/cortex/cortex-python.mdx
+++ b/docs/src/pages/cortex/cortex-python.mdx
@ -1,24 +0,0 @@
---
-title: Cortex.python
-description: Cortex.python Architecture
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-# Cortex.python
--- a/docs/src/pages/cortex/cortex-tensorrt-llm.mdx
+++ b/docs/src/pages/cortex/cortex-tensorrt-llm.mdx
@ -1,24 +0,0 @@
---
-title: Cortex.tensorrt-llm
-description: Cortex.tensorrt-llm Architecture
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-# Cortex.tensorrt-llm
--- a/docs/src/pages/cortex/embeddings.mdx
+++ b/docs/src/pages/cortex/embeddings.mdx
@ -1,22 +0,0 @@
---
-title: Embeddings
-description: Embeddings
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
--- a/docs/src/pages/cortex/embeddings/overview.mdx
+++ b/docs/src/pages/cortex/embeddings/overview.mdx
@ -1,22 +0,0 @@
---
-title: Overview
-description: Overview.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
--- a/docs/src/pages/cortex/error-codes.mdx
+++ b/docs/src/pages/cortex/error-codes.mdx
@ -1,22 +0,0 @@
---
-title: Error Codes
-description: Error Codes.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
--- a/docs/src/pages/cortex/ext-architecture.mdx
+++ b/docs/src/pages/cortex/ext-architecture.mdx
@ -1,22 +0,0 @@
---
-title: Extensions Architecture
-description: Extensions Architecture
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
--- a/docs/src/pages/cortex/fine-tuning.mdx
+++ b/docs/src/pages/cortex/fine-tuning.mdx
@ -1,22 +0,0 @@
---
-title: Fine Tuning
-description: Fine Tuning
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
--- a/docs/src/pages/cortex/fine-tuning/overview.mdx
+++ b/docs/src/pages/cortex/fine-tuning/overview.mdx
@ -1,22 +0,0 @@
---
-title: Overview
-description: Overview.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
--- a/docs/src/pages/cortex/function-calling.mdx
+++ b/docs/src/pages/cortex/function-calling.mdx
@ -1,22 +0,0 @@
---
-title: Function Calling
-description: Function Calling
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
--- a/docs/src/pages/cortex/hardware.mdx
+++ b/docs/src/pages/cortex/hardware.mdx
@ -1,50 +0,0 @@
---
-title: Hardware Requirements
-description: Get started quickly with Jan, a ChatGPT-alternative that runs on your own computer, with a local API server. Learn how to install Jan and select an AI model to start chatting.
-sidebar_position: 2
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    quickstart,
-    getting started,
-    using AI model,
-    installation,
-  ]
---
-
-import { Tabs } from 'nextra/components'
-import { Callout, Steps } from 'nextra/components'
-
-# Hardware Requirements
-
-To run LLMs on device, Cortex has the following hardware requirements:
-<Callout type="info">
-These are the general hardware requirements for running Cortex on your system. Please refer to the respective [installation](/cortex/installation) sections for detailed specifications tailored to each environment.
-
-</Callout>
-
-## OS
- MacOSX 13.6 or higher.
- Windows 10 or higher.
- Ubuntu 12.04 and later.
-
-## RAM (CPU Mode)
- 8GB for running up to 3B models.
- 16GB for running up to 7B models.
- 32GB for running up to 13B models.
-
-## VRAM (GPU Mode)
- 6GB can load the 3B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
- 8GB can load the 7B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
- 12GB can load the 13B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
-
-## Disk Space
- 10GB: The app is 1.02 MB, but models are usually 4GB+
--- a/docs/src/pages/cortex/index.mdx
+++ b/docs/src/pages/cortex/index.mdx
@ -1,50 +0,0 @@
---
-title: Cortex
-description: Cortex is an Local LLM engine for developers
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Discord integration,
-    Discord,
-    bot,
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-
-# Cortex
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-![Cortex Cover Image](./_assets/cortex-cover.png)
-
-Cortex is an [OpenAI compatible](https://platform.openai.com/docs/introduction), local AI server that developers can use to build LLM apps. It can be used as a standalone server, or imported as a library. 
-
-Cortex currently supports two inference engines: 
- Llama.cpp
- TensorRT-LLM
-
-<Callout>
-  **Real-world Use**: Cortex powers [Jan](/docs), our local ChatGPT-alternative. 
-  
-  Cortex has been battle-tested through 900k downloads, and handles a variety of hardware and software edge cases.
-</Callout>
-
-### Roadmap
-
-Cortex's roadmap is to implement an [OpenAI-equivalent API](https://platform.openai.com/docs/api-reference) using a fully open source stack. Our goal is to make switching to open source AI as easy as possible for developers.
-
-### Architecture
-
-Cortex's [architecture](/cortex/architecture) features C++ inference core, with [higher-order features](/cortex/architecture) handled in Typescript. 
-
-Our [long-term direction](/cortex/roadmap) is to (eventually) move towards being a full C++ library to enable embedded and robotics use cases.
--- a/docs/src/pages/cortex/installation.mdx
+++ b/docs/src/pages/cortex/installation.mdx
@ -1,37 +0,0 @@
---
-title: Desktop Installation
-description: Cortex Desktop Installation.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-import childPages from './installation/_meta.json';
-
-# Cortex Desktop Installation
-
-<br/>
-
-<Cards
-  children={Object.keys(childPages).map((key, i) => (
-    <Card
-      key={i}
-      title={childPages[key].title}
-      href={childPages[key].href}
-    />
-  ))}
-/>
--- a/docs/src/pages/cortex/installation/_meta.json
+++ b/docs/src/pages/cortex/installation/_meta.json
@ -1,14 +0,0 @@
-{
-  "mac": {
-    "title": "Mac",
-    "href": "/cortex/installation/mac"
-  },
-  "windows": {
-    "title": "Windows",
-    "href": "/cortex/installation/windows"
-  },
-  "linux": {
-    "title": "Linux",
-    "href": "/cortex/installation/linux"
-  }
-}
--- a/docs/src/pages/cortex/installation/linux.mdx
+++ b/docs/src/pages/cortex/installation/linux.mdx
@ -1,181 +0,0 @@
---
-title: Linux
-description: Install Cortex CLI on Linux.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    quickstart,
-    getting started,
-    using AI model,
-    installation,
-    "desktop"
-  ]
---
-
-import { Tabs, Steps } from 'nextra/components'
-import { Callout } from 'nextra/components'
-import FAQBox from '@/components/FaqBox'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-# Linux Installation
-## Prerequisites
-
-### Dependencies
-
-Before installation, ensure that you have installed the following:
-
- **Node.js**: Required for running the installation.
- **NPM**: Needed to manage packages.
-
-<Callout type="info">
-The **CPU instruction sets** are not required for the initial installation of Cortex. This dependency will be automatically installed during the Cortex initialization if they are not already on your system.
-
-</Callout>
-
-### Hardware
-
-Ensure that your system meets the following requirements to run Cortex:
-<Tabs items={['OS', 'CPU', 'RAM', 'GPU', 'Disk']}>
-<Tabs.Tab>
- Debian-based (Supports `.deb` and `AppImage` )
-    - Ubuntu-based
-        - Ubuntu Desktop LTS (official)/ Ubuntu Server LTS (only for server)
-        - Edubuntu (Mainly desktop)
-        - Kubuntu (Desktop only)
-        - Lubuntu (Both desktop and server, though mainly desktop)
-        - Ubuntu Budgie (Mainly desktop)
-        - Ubuntu Cinnamon (Desktop only)
-        - Ubuntu Kylin (Both desktop and server)
-        - Ubuntu MATE (Desktop only)
- Pacman-based
-    - Arch Linux based
-        - Arch Linux (Mainly desktop)
-        - SteamOS (Desktop only)
- RPM-based (Supports `.rpm` and `AppImage` )
- Fedora-based
-    - RHEL-based (Server only)
- openSUSE (Both desktop and server)
-
-    <Callout type="info">
-    - Please check whether your Linux distribution supports desktop, server, or both environments.
-    
-    </Callout>
-</Tabs.Tab>
-<Tabs.Tab>
-<Tabs items={['Intel', 'AMD']}>
-<Tabs.Tab>
-<Callout type="info">
- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
- We support older processors with AVX and AVX-512, though this is not recommended.
-</Callout>
- Haswell processors (Q2 2013) and newer.
- Tiger Lake (Q3 2020) and newer for Celeron and Pentium processors.
-</Tabs.Tab>
-<Tabs.Tab>
-<Callout type="info">
- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
- We support older processors with AVX and AVX-512, though this is not recommended.
-</Callout>
- Excavator processors (Q2 2015) and newer.
-</Tabs.Tab>
-</Tabs>
-</Tabs.Tab>
-<Tabs.Tab>
- 8GB for running up to 3B models (int4).
- 16GB for running up to 7B models (int4).
- 32GB for running up to 13B models (int4).
-
-<Callout type="info">
-We support DDR2 RAM as the minimum requirement but recommend using newer generations of RAM for improved performance.
-
-</Callout>
-</Tabs.Tab>
-<Tabs.Tab>    
- 6GB can load the 3B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
- 8GB can load the 7B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
- 12GB can load the 13B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
-    
-<Callout type="info">
-Having at least 6GB VRAM when using NVIDIA, AMD, or Intel Arc GPUs is recommended.
-
-</Callout>
-</Tabs.Tab>
-<Tabs.Tab>     
- At least 10GB for app storage and model download.
-</Tabs.Tab>
-</Tabs>
-
-## Cortex Installation
-
-To install Cortex, follow the steps below:
-
-<Steps>
-### Step 1: Install Cortex
-
-Run the following command to install Cortex globally on your machine:
-
-<Callout type="info">
-Install NPM on your machine before proceeding with this step.
-
-</Callout>
-
-```sh
-# Install globally on your system
-npm i -g @janhq/cortex
-```
-<Callout type="info">
-Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
-
-</Callout>
-
-### Step 2: Verify the Installation
-
-1. After installation, you can verify that Cortex is installed correctly by getting help information.
-
-```sh
-# Get the help information
-cortex -h
-```
-2. Cortex is ready to use!
-</Steps>
-## Build from Source
-
-To install Cortex from the source, follow the steps below:
-
-1. Clone the Cortex repository [here](https://github.com/menloresearch/cortex/tree/dev).
-2. Navigate to the `cortex-js` folder.
-3. Open the terminal and run the following command to build the Cortex project:
-
-```sh
-npx nest build
-```
-
-4. Make the `command.js` executable:
-
-```sh
-chmod +x '[path-to]/cortex/cortex-js/dist/src/command.js'
-```
-
-5. Link the package globally:
-
-```sh
-npm link
-```
-6. Initialize Cortex by following the steps [here](#step-3-initialize-cortex).
-## Uninstall Cortex
-
-Run the following command to uninstall Cortex globally on your machine:
-```sh
-# Uninstall globally on your system
-npm uninstall -g @janhq/cortex
-```
--- a/docs/src/pages/cortex/installation/mac.mdx
+++ b/docs/src/pages/cortex/installation/mac.mdx
@ -1,147 +0,0 @@
---
-title: Mac
-description: Install Cortex CLI on Mac.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    quickstart,
-    getting started,
-    using AI model,
-    installation,
-    "desktop"
-  ]
---
-
-import { Tabs, Steps } from 'nextra/components'
-import { Callout } from 'nextra/components'
-import FAQBox from '@/components/FaqBox'
-
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-# Mac Installation
-## Prerequisites
-
-### Dependencies
-
-Before installation, ensure that you have installed the following:
-
- **Node.js**: Required for running the installation.
- **NPM**: Needed to manage packages.
-
-<Callout type="info">
-The **CPU instruction sets** are not required for the initial installation of Cortex. This dependency will be automatically installed during the Cortex initialization if they are not already on your system.
-
-</Callout>
-
-### Hardware
-
-Ensure that your system meets the following requirements to run Cortex:
-<Tabs items={['Mac Intel CPU', 'Mac Apple Silicon']}>
-<Tabs.Tab>
-<Tabs items={['Operating System', 'Memory', 'Disk']}>
-<Tabs.Tab>
- MacOSX 13.6 or higher.
-</Tabs.Tab>
-<Tabs.Tab>
- 8GB for running up to 3B models.
- 16GB for running up to 7B models.
- 32GB for running up to 13B models.
-</Tabs.Tab>
-<Tabs.Tab>
- At least 10GB for app and model download.
-</Tabs.Tab>
-</Tabs>
-</Tabs.Tab>
-<Tabs.Tab>
-<Tabs items={['Operating System', 'Memory', 'Disk']}>
-<Tabs.Tab>
- MacOSX 13.6 or higher.
-</Tabs.Tab>
-<Tabs.Tab>
- 8GB for running up to 3B models.
- 16GB for running up to 7B models.
- 32GB for running up to 13B models.
-<Callout type="info">
-Apple Silicon Macs leverage Metal for GPU acceleration, providing faster performance than Intel Macs, which rely solely on CPU processing.
-
-</Callout>
-</Tabs.Tab>
-<Tabs.Tab>
- At least 10GB for app and model download.
-</Tabs.Tab>
-</Tabs>
-</Tabs.Tab>
-</Tabs>
-## Cortex Installation
-
-To install Cortex, follow the steps below:
-<Steps>
-### Step 1: Install Cortex
-
-Run the following command to install Cortex globally on your machine:
-
-<Callout type="info">
-Install NPM on your machine before proceeding with this step.
-
-</Callout>
-
-```sh
-# Install globally on your system
-npm i -g @janhq/cortex
-```
-<Callout type="info">
-Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
-
-</Callout>
-
-### Step 2: Verify the Installation
-
-1. After installation, you can verify that Cortex is installed correctly by getting help information.
-
-```sh
-# Get the help information
-cortex -h
-```
-2. Cortex is ready to use!
-</Steps>
-## Build from Source
-
-To install Cortex from the source, follow the steps below:
-
-1. Clone the Cortex repository [here](https://github.com/menloresearch/cortex/tree/dev).
-2. Navigate to the `cortex-js` folder.
-3. Open the terminal and run the following command to build the Cortex project:
-
-```sh
-npx nest build
-```
-
-4. Make the `command.js` executable:
-
-```sh
-chmod +x '[path-to]/cortex/cortex-js/dist/src/command.js'
-```
-
-5. Link the package globally:
-
-```sh
-npm link
-```
-6. Initialize Cortex by following the steps [here](#step-3-initialize-cortex).
-## Uninstall Cortex
-
-Run the following command to uninstall Cortex globally on your machine:
-```sh
-# Uninstall globally using NPM
-npm uninstall -g @janhq/cortex
-```
--- a/docs/src/pages/cortex/installation/windows.mdx
+++ b/docs/src/pages/cortex/installation/windows.mdx
@ -1,198 +0,0 @@
---
-title: Windows
-description: Install Cortex CLI on Windows.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    quickstart,
-    getting started,
-    using AI model,
-    installation,
-    "desktop"
-  ]
---
-
-import { Tabs, Steps } from 'nextra/components'
-import { Callout } from 'nextra/components'
-import FAQBox from '@/components/FaqBox'
-
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-# Windows Installation
-
-## Prerequisites
-
-### Dependencies
-
-Before installation, ensure that you have installed the following:
-
- **Node.js**: Required for running the installation.
- **NPM**: Needed to manage packages.
- **Windows Subsystem Linux (Ubuntu)**: Required to install for WSL2 installation.
-
-<Callout type="info">
-The **CPU instruction sets** are not required for the initial installation of Cortex. This dependency will be automatically installed during the Cortex initialization if they are not already on your system.
-
-</Callout>
-
-### Hardware
-
-Ensure that your system meets the following requirements to run Cortex:
-<Tabs items={['OS', 'CPU', 'RAM', 'GPU', 'Disk']}>
-<Tabs.Tab>
- Windows 10 or higher.
-</Tabs.Tab>
-<Tabs.Tab>
-<Tabs items={['Intel', 'AMD']}>
-<Tabs.Tab>
-<Callout type="info">
- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
- We support older processors with AVX and AVX-512, though this is not recommended.
-</Callout>
- Haswell processors (Q2 2013) and newer.
- Tiger Lake (Q3 2020) and newer for Celeron and Pentium processors.
-</Tabs.Tab>
-<Tabs.Tab>
-<Callout type="info">
- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
- We support older processors with AVX and AVX-512, though this is not recommended.
-</Callout>
- Excavator processors (Q2 2015) and newer.
-</Tabs.Tab>
-</Tabs>
-</Tabs.Tab>
-<Tabs.Tab>
- 8GB for running up to 3B models (int4).
- 16GB for running up to 7B models (int4).
- 32GB for running up to 13B models (int4).
-
-<Callout type="info">
-We support DDR2 RAM as the minimum requirement but recommend using newer generations of RAM for improved performance.
-
-</Callout>
-</Tabs.Tab>
-<Tabs.Tab>    
- 6GB can load the 3B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
- 8GB can load the 7B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
- 12GB can load the 13B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
-    
-<Callout type="info">
-Having at least 6GB VRAM when using NVIDIA, AMD, or Intel Arc GPUs is recommended.
-
-</Callout>
-</Tabs.Tab>
-<Tabs.Tab>     
- At least 10GB for app storage and model download.
-</Tabs.Tab>
-</Tabs>
-
-## Cortex Installation
-
-To install Cortex, follow the steps below:
-
-<Steps>
-### Step 1: Install Cortex
-
-Run the following command to install Cortex globally on your machine:
-
-<Callout type="info">
-Install NPM on your machine before proceeding with this step.
-
-</Callout>
-
-```sh
-# Install globally on your system
-npm i -g @janhq/cortex
-```
-<Callout type="info">
-Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
-
-</Callout>
-
-### Step 2: Verify the Installation
-
-1. After installation, you can verify that Cortex is installed correctly by getting help information.
-
-```sh
-# Get the help information
-cortex -h
-```
-2. Cortex is ready to use!
-</Steps>
-## Windows Subsystem Linux
-
-To install Cortex using the NPM package in WSL2, follow the steps below:
-<Steps>
-### Step 1: Open your WSL2 Terminal
-
-Open your Linux terminal in WSL2. For WSL2, you can use the Linux distribution terminal, which is Ubuntu.
-
-### Step 2: Install Cortex
-
-Run the following command to install Cortex globally on your machine:
-
-<Callout type="info">
-Install NPM on your machine before proceeding with this step.
-
-</Callout>
-
-```sh
-# Install globally on your system
-npm i -g @janhq/cortex
-```
-<Callout type="info">
-Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
-
-</Callout>
-
-### Step 3: Verify the Installation
-
-After installation, you can verify that Cortex is installed correctly by getting help information.
-
-```sh
-# Get the help information
-cortex -h
-```
-</Steps>
-## Build from Source
-
-To install Cortex from the source, follow the steps below:
-
-1. Clone the Cortex repository [here](https://github.com/menloresearch/cortex/tree/dev).
-2. Navigate to the `cortex-js` folder.
-3. Open the terminal and run the following command to build the Cortex project:
-
-```sh
-npx nest build
-```
-
-4. Make the `command.js` executable:
-
-```sh
-node "[path-to]\cortex\cortex-js\dist\src\command.js"
-```
-
-5. Link the package globally:
-
-```sh
-npm link
-```
-6. Initialize Cortex by following the steps [here](#step-3-initialize-cortex).
-
-## Uninstall Cortex
-
-Run the following command to uninstall Cortex globally on your machine:
-```sh
-# Uninstall globally on your system
-npm uninstall -g @janhq/cortex
-```
--- a/docs/src/pages/cortex/model-operations.mdx
+++ b/docs/src/pages/cortex/model-operations.mdx
@ -1,22 +0,0 @@
---
-title: Model Operations
-description: Model Operations
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
--- a/docs/src/pages/cortex/model-operations/overview.mdx
+++ b/docs/src/pages/cortex/model-operations/overview.mdx
@ -1,22 +0,0 @@
---
-title: Overview
-description: Overview.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
--- a/docs/src/pages/cortex/py-library.mdx
+++ b/docs/src/pages/cortex/py-library.mdx
@ -1,69 +0,0 @@
---
-title: Python Library
-description: Cortex Python Library.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# Python Library
-Cortex also provides a Python client library that is a **direct substitute for OpenAI's** [Python library](https://github.com/openai/openai-python), enabling easy integration and streamlined workflows.
-
-## Installation
-Use the following pip command to install the Cortex library in your project:
-```py
-pip install @janhq/cortex-python
-```
-## Usage
-
-Switching to the Cortex Client Library from the OpenAI Python Library involves simple updates.
-1. Replace the OpenAI import with Cortex in your application:
-```diff
- from openai import OpenAI
-+ from @janhq/cortex-python import Cortex
-```
-2. Modify the initialization of the client to use Cortex:
-```diff
- client = OpenAI(api_key='your-api-key')
-+ client = Cortex(base_url="BASE_URL", api_key="API_KEY")  # This can be omitted if using the default
-
-```
-### Example Usage
-```py
-from @janhq/cortex-python import Cortex
-
-client = OpenAI(base_url="http://localhost:1337", api_key="cortex")
-
-model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
-client.models.start(model=model)
-
-completion = client.chat.completions.create(
-    model=model,
-    messages=[
-        {
-            "role": "user",
-            "content": "Say this is a test",
-        },
-    ],
-)
-print(completion.choices[0].message.content)
-```
--- a/docs/src/pages/cortex/quickstart.mdx
+++ b/docs/src/pages/cortex/quickstart.mdx
@ -1,55 +0,0 @@
---
-title: Quickstart
-description: Cortex Quickstart.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-# Quickstart
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-To get started,  confirm that your system meets the [hardware requirements](/cortex/hardware), and follow the steps below:
-
-```bash
-# 1. Install Cortex using NPM
-npm i -g @janhq/cortex
-
-# 2. Download a GGUF model
-cortex models pull llama3
-
-# 3. Run the model to start chatting
-cortex models run llama3
-
-# 4. (Optional) Run Cortex in OpenAI-compatible server mode
-cortex serve
-```
-<Callout type="info">
-For more details regarding the Cortex server mode, please see here:
- [Server Endpoint](/cortex/server)
- [`cortex serve` command](/cortex/cli/serve)
-</Callout>
-
-## What's Next?
-With Cortex now fully operational, you're ready to delve deeper:
- Explore how to [install Cortex](/cortex/installation) across various hardware environments.
- Familiarize yourself with the comprehensive set of [Cortex CLI commands](/cortex/cli) available for use.
- Gain insights into the system’s design by examining the [architecture](/cortex/architecture) of Cortex.
--- a/docs/src/pages/cortex/rag.mdx
+++ b/docs/src/pages/cortex/rag.mdx
@ -1,22 +0,0 @@
---
-title: RAG
-description: RAG
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
--- a/docs/src/pages/cortex/rag/overview.mdx
+++ b/docs/src/pages/cortex/rag/overview.mdx
@ -1,22 +0,0 @@
---
-title: Overview
-description: Overview.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
--- a/docs/src/pages/cortex/server.mdx
+++ b/docs/src/pages/cortex/server.mdx
@ -1,47 +0,0 @@
---
-title: Command Line Interface
-description: Cortex CLI.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps, Cards, Card } from 'nextra/components'
-import OAICoverage from "@/components/OAICoverage"
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# Server Endpoint
-
-Cortex can run in headless server mode, providing an [OpenAI-API compatible](https://platform.openai.com/docs/api-reference/introduction) endpoint.
-
-## Usage
-
-```
-cortex serve
-```
-
-A full, local AI server will be started on port `7331` (customizable).
-
-## Playground
-
-You can open up an interactive playground at: http://localhost:1337/api, generated from Swagger.
-
-
-## OpenAI Coverage
-
-<OAICoverage endDate='06-21-2024' />
--- a/docs/src/pages/cortex/text-generation.mdx
+++ b/docs/src/pages/cortex/text-generation.mdx
@ -1,86 +0,0 @@
---
-title: Overview
-description: Overview.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-import { Tabs } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-# Text Generation
-
-Cortex's Chat API is compatible with OpenAI’s [Chat Completions](https://platform.openai.com/docs/api-reference/chat) endpoint. It is a drop-in replacement for local inference.
-
-For local inference, Cortex is [multi-engine](#multiple-local-engines) and supports the following model formats: 
-
- `GGUF`: A generalizable LLM format that runs across CPUs and GPUs. Cortex implements a GGUF runtime through [llama.cpp](https://github.com/ggerganov/llama.cpp/).
- `TensorRT`: A a production-ready, enterprise-grade LLM format optimized for fast inference on NVIDIA GPUs. Cortex implements a TensorRT runtime through [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM).
-
-For remote inference, Cortex routes requests to multiple APIs, while providing a single, easy to use, OpenAI compatible endpoint. [Read more](#remote-api-integration). 
-
-## Usage
-
-<Tabs items={['CLI', 'Javascript', 'CURL']}>
-<Tabs.Tab>
-
-```bash
-# Streaming
-cortex chat --model janhq/TinyLlama-1.1B-Chat-v1.0-GGUF
-```
-</Tabs.Tab>
-</Tabs>
-
-**Read more:** 
-
- Chat Completion Object
- Chat Completions API
- Chat Completions CLI
-
-## Capabilities
-
-### Multiple Local Engines
-
-Cortex scales applications from prototype to production. It runs on CPU-only laptops with Llama.cpp and GPU-accelerated clusters with TensorRT-LLM.
-
-To learn more about how to configure each engine:
-
- Use llama.cpp
- Use tensorrt-llm
-
-To learn more about our engine architecture:
-
- cortex.cpp
- cortex.llamacpp
- cortex.tensorRTLLM
-
-### Multiple Remote APIs
-
-Cortex also works as an aggregator to make remote inference requests from a single endpoint. 
-
-Currently, Cortex supports: 
- OpenAI
- Groq
- Cohere
- Anthropic
- MistralAI
- Martian
- OpenRouter
-
--- a/docs/src/pages/cortex/ts-library.mdx
+++ b/docs/src/pages/cortex/ts-library.mdx
@ -1,66 +0,0 @@
---
-title: Typescript Library
-description: Cortex Node Client Library
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# Typescript Library
-Cortex provides a robust Typescript client library designed as a **direct substitute for OpenAI's** [Node.js/Typescript library](https://github.com/openai/openai-node), enabling easy integration and streamlined workflows.
-
-## Installation
-Install the package via npm with the following command in your project:
-```js
-npm install @janhq/cortex-node
-```
-
-## Usage
-
-Transitioning to the Cortex Client Library from the OpenAI Client Library involves minimal changes, mostly updating the import statement.
-1. Replace the OpenAI import with Cortex in your application:
-```diff
- import OpenAI from 'openai';
-+ import { Cortex } from '@janhq/cortex-node';
-```
-2. Modify the initialization of the client to use Cortex:
-```diff
- const openai = new OpenAI({
-+ const cortex = new Cortex({
-    baseURL: ['BASE_URL'], // The default base URL for Cortex is 'http://localhost:1337'
-    apiKey: process.env['OPENAI_API_KEY'], // This can be omitted if using the default
-});
-
-```
-### Example Usage
-```js
-import { Cortex } from '@janhq/cortex-node';
-
-const cortex = new Cortex({
-    baseURL: ['http://localhost:1337'],
-    apiKey: process.env['cortex'], 
-});
-
-cortex.models.start('llama3:7b')
-cortex.models.stop('llama3:7b')
-cortex.threads.list()
-```
--- a/docs/src/pages/cortex/vision.mdx
+++ b/docs/src/pages/cortex/vision.mdx
@ -1,22 +0,0 @@
---
-title: Vision
-description: Vision
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
--- a/docs/src/pages/cortex/vision/overview.mdx
+++ b/docs/src/pages/cortex/vision/overview.mdx
@ -1,22 +0,0 @@
---
-title: Overview
-description: Overview.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
---
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
--- a/docs/src/pages/docs/_assets/add_assistant.png
+++ b/docs/src/pages/docs/_assets/add_assistant.png
--- a/docs/src/pages/docs/_assets/api-server.png
+++ b/docs/src/pages/docs/_assets/api-server.png
--- a/docs/src/pages/docs/_assets/cohere.png
+++ b/docs/src/pages/docs/_assets/cohere.png
--- a/docs/src/pages/docs/_assets/google.png
+++ b/docs/src/pages/docs/_assets/google.png
--- a/docs/src/pages/docs/_assets/gpu_accl.png
+++ b/docs/src/pages/docs/_assets/gpu_accl.png
--- a/docs/src/pages/docs/_assets/groq.png
+++ b/docs/src/pages/docs/_assets/groq.png
--- a/docs/src/pages/docs/_assets/hf-unsloth.png
+++ b/docs/src/pages/docs/_assets/hf-unsloth.png
--- a/docs/src/pages/docs/_assets/hf_and_jan.png
+++ b/docs/src/pages/docs/_assets/hf_and_jan.png
--- a/docs/src/pages/docs/_assets/hf_token.png
+++ b/docs/src/pages/docs/_assets/hf_token.png
--- a/docs/src/pages/docs/_assets/jan_ui.png
+++ b/docs/src/pages/docs/_assets/jan_ui.png
--- a/docs/src/pages/docs/_assets/llama.cpp-01.png
+++ b/docs/src/pages/docs/_assets/llama.cpp-01.png
--- a/docs/src/pages/docs/_assets/mistralai.png
+++ b/docs/src/pages/docs/_assets/mistralai.png
--- a/docs/src/pages/docs/_assets/model-import-04.png
+++ b/docs/src/pages/docs/_assets/model-import-04.png
--- a/docs/src/pages/docs/_assets/model-import-05.png
+++ b/docs/src/pages/docs/_assets/model-import-05.png
--- a/docs/src/pages/docs/_assets/model-management-02.png
+++ b/docs/src/pages/docs/_assets/model-management-02.png
--- a/docs/src/pages/docs/_assets/model-management-04.png
+++ b/docs/src/pages/docs/_assets/model-management-04.png
--- a/docs/src/pages/docs/_assets/model-management-05.png
+++ b/docs/src/pages/docs/_assets/model-management-05.png
--- a/docs/src/pages/docs/_assets/model-management-06.png
+++ b/docs/src/pages/docs/_assets/model-management-06.png
--- a/docs/src/pages/docs/_assets/model-parameters.png
+++ b/docs/src/pages/docs/_assets/model-parameters.png
--- a/docs/src/pages/docs/_assets/openai.png
+++ b/docs/src/pages/docs/_assets/openai.png
--- a/docs/src/pages/docs/_assets/openrouter.png
+++ b/docs/src/pages/docs/_assets/openrouter.png
--- a/docs/src/pages/docs/_assets/quick-start-03.png
+++ b/docs/src/pages/docs/_assets/quick-start-03.png
--- a/docs/src/pages/docs/_assets/settings-04.png
+++ b/docs/src/pages/docs/_assets/settings-04.png
--- a/docs/src/pages/docs/_assets/settings-11.png
+++ b/docs/src/pages/docs/_assets/settings-11.png
--- a/docs/src/pages/docs/_assets/sys_monitor.png
+++ b/docs/src/pages/docs/_assets/sys_monitor.png
--- a/docs/src/pages/docs/_assets/together.png
+++ b/docs/src/pages/docs/_assets/together.png
--- a/docs/src/pages/docs/_assets/trouble-shooting-01.png
+++ b/docs/src/pages/docs/_assets/trouble-shooting-01.png
--- a/docs/src/pages/docs/_assets/trouble-shooting-02.png
+++ b/docs/src/pages/docs/_assets/trouble-shooting-02.png
--- a/docs/src/pages/docs/_meta.json
+++ b/docs/src/pages/docs/_meta.json
@ -1,17 +1,30 @@
 {
-  "-- Switcher": {
-    "type": "separator",
-    "title": "Switcher"
-  },
-  "get-started": {
-    "title": "GET STARTED",
+  "index": "Overview",
+  "how-to-separator": {
+    "title": "HOW TO",
    "type": "separator"
  },
-  "index": "Overview",
-  "quickstart": {
-    "title": "Quickstart"
+  "installation": "Install 👋 Jan",
+  "threads": "Start Chatting",
+  "manage-models": "Manage Models",
+  "assistants": "Create Assistants",
+
+  "tutorials-separators": {
+    "title": "TUTORIALS",
+    "type": "separator"
  },
-  "desktop": "Installation",
+  "quickstart": "Quickstart",
+  "remote-models": "Connect to Remote Models",
+  "custom-providers": "Add Custom Providers",
+  "server-examples": "Provide AI to Tools",
+  "mcp": "Model Context Protocol",
+
+  "explanation-separator": {
+    "title": "EXPLANATION",
+    "type": "separator"
+  },
+  "llama-cpp": "Local AI Engine",
+  "api-server": "Server Overview",
  "data-folder": "Jan Data Folder",
  "privacy": "Privacy",
  "privacy-policy": {
@ -19,39 +32,12 @@
    "display": "hidden",
    "title": "Privacy Policy"
  },
-  "user-guides": {
-    "title": "BASIC USAGE",
+
+  "reference-separator": {
+    "title": "REFERENCE",
    "type": "separator"
  },
-  "models": "Models",
-  "tools": "Tools",
-  "assistants": "Assistants",
-  "threads": "Chats",
  "settings": "Settings",
-  "api-server": "Local API Server",
-  "inference-engines": {
-    "title": "ENGINES",
-    "type": "separator"
-  },
-  "local-engines": "Local Engines",
-  "remote-models": "Remote Engines",
-  "install-engines": "Install Engines",
-  "extensions-separator": {
-    "title": "EXTENSIONS",
-    "type": "separator"
-  },
-  "extensions": "Overview",
-  "extensions-settings": "Extension Settings",
-  "configure-extensions": "Configure Extensions",
-  "install-extensions": "Install Extensions",
-  "troubleshooting-separator": {
-    "title": "TROUBLESHOOTING",
-    "type": "separator"
-  },
  "troubleshooting": "Troubleshooting",
-  "error-codes": {
-    "type": "page",
-    "display": "hidden",
-    "title": "Error Codes"
-  }
+  "model-parameters": "Model Parameters"
 }
--- a/docs/src/pages/docs/api-server.mdx
+++ b/docs/src/pages/docs/api-server.mdx
@ -21,13 +21,13 @@ import { Settings, EllipsisVertical } from 'lucide-react'

 # Local API Server

-Jan includes a built-in API server that is compatible with OpenAI's API specification, allowing you to interact with AI models through a local HTTP interface. This means you can use Jan as a drop-in replacement for OpenAI's API, but running entirely on your computer.
+Jan includes a built-in API server that is compatible with OpenAI's API specification, allowing you to
+interact with AI models through a local HTTP interface. This means you can use Jan as a drop-in replacement
+for OpenAI's API, but running entirely on your computer.

-Jan uses **Cortex** as its core engine for running AI models. If you need a standalone API server without Jan's desktop interface (for example, in server environments or for command-line usage), you can use Cortex directly: https://cortex.so.
-
-<Callout>
-Full API documentation is available at [Cortex's API Reference](https://cortex.so/api-reference#tag/chat).
-</Callout>
+Jan uses **llama.cpp** as its core engine for running AI models. If you need a standalone API server without
+Jan's desktop interface (for example, in server environments or for command-line usage), you can use it directly
+as well after downloading it from [here](https://github.com/ggml-org/llama.cpp).

 <br/>
 ![Local API Server](./_assets/api-server.png)
@ -39,7 +39,7 @@ Full API documentation is available at [Cortex's API Reference](https://cortex.s

 ### Step 1: Start Server
 1. Navigate to the **Local API Server**
-2. Configure [Server Settings](/docs/api-server#server-settings)
+2. Add an API Key (it can be anything) or fully configure the server at [Server Settings](/docs/api-server#server-settings)
 3. Click **Start Server** button
 4. Wait for the confirmation message in the logs panel, your server is ready when you see: `JAN API listening at: http://127.0.0.1:1337`

@ -48,21 +48,20 @@ Full API documentation is available at [Cortex's API Reference](https://cortex.s
 The easiest way to test your server is through the API Playground:
 1. Click the **API Playground** button to open its testing interface
 2. Select a model from the dropdown menu in Jan interface
-3. Try a simple [chat completion](https://cortex.so/api-reference#tag/chat/post/v1/chat/completions) request
+3. Try a simple request
 4. View the response in real-time

-### Step 3: Use the API 
-Navigate to [Cortex's API Reference](https://cortex.so/api-reference#tag/chat) to see full API endpoints for your use case.
+### Step 3: Use the API
 </Steps>


 ## Server Settings

 #### Host Address Options
- **127.0.0.1 (Recommended)**: 
+- **127.0.0.1 (Recommended)**:
  - Only accessible from your computer
  - Most secure option for personal use
- **0.0.0.0**: 
+- **0.0.0.0**:
  - Makes server accessible from other devices on your network
  - Use with caution and only when necessary

@ -100,24 +99,23 @@ Enable **Verbose Server Logs** for detailed error messages.
 </Callout>

 **1. Common Issues**
-   - Confirm the server is running
-   - Check if model is successfully loaded in Jan
-
-   - Check if the port is already in use by another application
-   - Verify you have admin/sudo rights if needed
-   - Make sure your API endpoint matches your server settings. Example: Using `http://localhost:1337` when you set a different port.
-   - Make sure the model name in your API request matches exactly what's shown in Jan. Example: If you selected "Llama 3.2 1B Instruct Q8" in Jan, use `llama3.2-1b-instruct` in your API request.
-   - Verify your JSON request format is correct
-   - Verify firewall settings
-   - Look for detailed error messages in the logs
+- Confirm the server is running
+- Check if model is successfully loaded in Jan
+- Check if the port is already in use by another application
+- Verify you have admin/sudo rights if needed
+- Make sure your API endpoint matches your server settings. Example: Using `http://localhost:1337` when you set a different port.
+- Make sure the model name in your API request matches exactly what's shown in Jan. Example: If you selected "Llama 3.2 1B Instruct Q8" in Jan, use `llama3.2-1b-instruct` in your API request.
+- Verify your JSON request format is correct
+- Verify firewall settings
+- Look for detailed error messages in the logs

 **2. CORS Errors in Web Apps**
-   - Enable CORS in server settings if using from a webpage
-   - Verify the origin of the request
-   - Verify your web app's request URL matches the server address exactly
-   - Check browser console for specific error messages
+- Enable CORS in server settings if using from a webpage
+- Verify the origin of the request
+- Verify your web app's request URL matches the server address exactly
+- Check browser console for specific error messages

 **3. Performance Issues**
-   - Monitor system resources (CPU, RAM, and GPU usage)
-   - Try to reduce the context length or `ngl` (number of GPU layers)
-   - Check for other resource-intensive applications
+- Monitor system resources (CPU, RAM, and GPU usage)
+- Try to reduce the context length or `ngl` (number of GPU layers)
+- Check for other resource-intensive applications
--- a/docs/src/pages/docs/assistants.mdx
+++ b/docs/src/pages/docs/assistants.mdx
@ -21,21 +21,23 @@ import { Callout, Steps } from 'nextra/components'

 # Assistants

-Jan allows you to manage multiple Assistants, each with its own configuration profile that determines how the AI should behave and respond to your inputs. You can add, edit, or delete assistants, and customize their instructions and settings.
+Jan allows you to give models specific sets of instructions without having to repeat yourself. We called these
+models with your instructions, Assistants. Each of these assistants can also have their own set of configuration
+which can help guide how the AI model should behave and respond to your inputs. You can add, edit, or delete
+assistants, and customize their instructions and settings from the Assistants tab.

-![Assistants UI Overview](./_assets/assistants-ui-overview.png)
+![The Assistants management page, where you can view, add, edit, or delete assistants. Each assistant has a name,
+description, and can be customized for different tasks.](./_assets/assistants-ui-overview.png)

-*Screenshot: The Assistants management page, where you can view, add, edit, or delete assistants. Each assistant has a name, description, and can be customized for different tasks.*
-
-## Accessing the Assistants Page
+To find the Assistants tab:

 1. Open Jan and look at the left sidebar.
 2. Click on the **Assistants** tab (see highlighted section in the screenshot above).
-3. The main panel will display all your current assistants
+3. The main panel will display all your current assistants.

 ## Managing Assistants

- **Add a New Assistant**: Click the `+` button in the Assistants panel to create a new assistant profile.
+- **Add a New Assistant**: Click the `+` button in the Assistants panel to create a new assistant with your instructions.
 - **Edit an Assistant**: Click the pencil (✏️) icon on any assistant card to update its name, description, or instructions.
 - **Delete an Assistant**: Click the trash (🗑️) icon to remove an assistant you no longer need.

@ -44,7 +46,7 @@ Jan allows you to manage multiple Assistants, each with its own configuration pr
 Each assistant can have its own set of instructions to guide its behavior. For example:

 ```
-Act as a software development mentor focused on Python and JavaScript.
+Act as a software engineering mentor focused on Python and JavaScript.
 Provide detailed explanations with code examples when relevant.
 Use markdown formatting for code blocks.
 ```
@ -56,23 +58,30 @@ Respond in a casual, friendly tone. Keep explanations brief and use simple langu
 Provide examples when explaining complex topics.
 ```

+Or:
+
+```
+Respond in a casual, friendly tone. Keep explanations brief and use simple language.
+Provide examples when explaining complex topics.
+```
+
 ## Best Practices
 - Be clear and specific about the desired behavior for each assistant.
 - Include preferences for formatting, tone, or style.
+- Include examples to increase the model's compliance with your request.
 - Use different assistants for different tasks (e.g., translation, travel planning, financial advice).

---
-
-*Note: The ability to create, edit, and delete assistants is available in the Assistants tab. Each assistant can be tailored for a specific use case, making Jan a flexible and powerful tool for your needs.*

 ## Switching and Managing Assistants in Chat

-You can quickly switch between assistants, or create and edit them, directly from the Chat screen using the assistant dropdown menu at the top:
+You can quickly switch between assistants, or create and edit them, directly from the Chat screen using the
+assistant dropdown menu at the top:

 ![Assistant Dropdown](./_assets/assistant-dropdown.png)

- Click the assistant name (e.g., "Travel Planner") at the top of the Chat screen to open the dropdown menu.
- The dropdown lists all your assistants. Click any assistant to switch to it for the current chat session.
+- Click the assistant's name (e.g., "Travel Planner") at the top of the Chat screen to open the dropdown menu.
+- The dropdown lists all of your assistants. Click on any of the assistants available to switch to it for the
+current chat session.
 - To create a new assistant, select **Create Assistant** at the bottom of the dropdown. This opens the Add Assistant dialog:

 ![Add Assistant Dialog](./_assets/assistant-add-dialog.png)
@ -82,10 +91,11 @@ You can quickly switch between assistants, or create and edit them, directly fro
 ![Edit Assistant Dialog](./_assets/assistant-edit-dialog.png)

 ### Add/Edit Assistant Dialogs
- Set an emoji and name for your assistant.
+- Set an (optional) emoji and name for your assistant.
 - Optionally add a description.
 - Enter detailed instructions to guide the assistant's behavior.
- Adjust predefined parameters (like Temperature, Top P, etc.) or add custom parameters as needed.
+- Adjust the predefined parameters (like Temperature, Top P, etc.) or add custom parameters as needed.
 - Click **Save** to apply your changes.

-This workflow allows you to seamlessly manage and switch between assistants while chatting, making it easy to tailor Jan to your needs in real time.
+This workflow allows you to seamlessly manage and switch between assistants while chatting, making it easy to tailor
+Jan to your needs in real time.
--- a/docs/src/pages/docs/configure-extensions.mdx
+++ b/docs/src/pages/docs/configure-extensions.mdx
@ -1,301 +0,0 @@
---
-title: Configure Extensions
-description: Learn about Jan's default extensions and explore how to configure them.
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Jan Extensions,
-    Extensions,
-  ]
---
-
-# Configure Extensions 
-Extensions in Jan are configured through a JSON file that defines their behavior and metadata. While most users won't need to modify this file directly, advanced users can customize extension settings for specific needs.
-
-## Access extensions.json
-
-1. Navigate to [Jan Data Folder](/docs/data-folder):
-2. Open the `extensions.json` file in any text editor
-
-## Configuration Options
-
-| Option | Description | Example |
-|--------|-------------|---------|
-| `_active` | Enable/disable the extension | `true` or `false` |
-| `listeners` | Event listeners configuration | `{}` for default |
-| `origin` | Installation path of the extension | `"C:\\Users\\...\\jan\\resources\\app.asar.unpacked\\..."` |
-| `installOptions` | Installation configuration | `{"version": false, "fullMetadata": true}` |
-| `name` | Internal extension identifier | `"@janhq/conversational-extension"` |
-| `productName` | Display name shown in UI | `"Conversational"` |
-| `version` | Extension version number | `"1.0.0"` |
-| `main` | Entry point file path | `"dist/index.js"` |
-| `description` | Extension description | `"This extension enables conversations..."` |
-| `url` | Extension repository URL | `"extension://@janhq/conversational-extension/dist/index.js"` |
-
-## Full Example Configuration
-
-```json title="~/jan/data/extensions/extensions.json"
-{
-    "@janhq/conversational-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-conversational-extension-1.0.0.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/conversational-extension",
-        "productName": "Conversational",
-        "version": "1.0.0",
-        "main": "dist/index.js",
-        "description": "This extension enables conversations and state persistence via your filesystem.",
-        "url": "extension://@janhq/conversational-extension/dist/index.js"
-    },
-    "@janhq/inference-anthropic-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-anthropic-extension-1.0.2.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-anthropic-extension",
-        "productName": "Anthropic Inference Engine",
-        "version": "1.0.2",
-        "main": "dist/index.js",
-        "description": "This extension enables Anthropic chat completion API calls.",
-        "url": "extension://@janhq/inference-anthropic-extension/dist/index.js"
-    },
-    "@janhq/inference-triton-trt-llm-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-triton-trt-llm-extension-1.0.0.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-triton-trt-llm-extension",
-        "productName": "Triton-TRT-LLM Inference Engine",
-        "version": "1.0.0",
-        "main": "dist/index.js",
-        "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option.",
-        "url": "extension://@janhq/inference-triton-trt-llm-extension/dist/index.js"
-    },
-    "@janhq/inference-mistral-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-mistral-extension-1.0.1.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-mistral-extension",
-        "productName": "MistralAI Inference Engine",
-        "version": "1.0.1",
-        "main": "dist/index.js",
-        "description": "This extension enables Mistral chat completion API calls.",
-        "url": "extension://@janhq/inference-mistral-extension/dist/index.js"
-    },
-    "@janhq/inference-martian-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-martian-extension-1.0.1.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-martian-extension",
-        "productName": "Martian Inference Engine",
-        "version": "1.0.1",
-        "main": "dist/index.js",
-        "description": "This extension enables Martian chat completion API calls.",
-        "url": "extension://@janhq/inference-martian-extension/dist/index.js"
-    },
-    "@janhq/inference-openrouter-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-openrouter-extension-1.0.0.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-openrouter-extension",
-        "productName": "OpenRouter Inference Engine",
-        "version": "1.0.0",
-        "main": "dist/index.js",
-        "description": "This extension enables Open Router chat completion API calls.",
-        "url": "extension://@janhq/inference-openrouter-extension/dist/index.js"
-    },
-    "@janhq/inference-nvidia-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-nvidia-extension-1.0.1.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-nvidia-extension",
-        "productName": "NVIDIA NIM Inference Engine",
-        "version": "1.0.1",
-        "main": "dist/index.js",
-        "description": "This extension enables NVIDIA chat completion API calls.",
-        "url": "extension://@janhq/inference-nvidia-extension/dist/index.js"
-    },
-    "@janhq/inference-groq-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-groq-extension-1.0.1.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-groq-extension",
-        "productName": "Groq Inference Engine",
-        "version": "1.0.1",
-        "main": "dist/index.js",
-        "description": "This extension enables fast Groq chat completion API calls.",
-        "url": "extension://@janhq/inference-groq-extension/dist/index.js"
-    },
-    "@janhq/inference-openai-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-openai-extension-1.0.2.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-openai-extension",
-        "productName": "OpenAI Inference Engine",
-        "version": "1.0.2",
-        "main": "dist/index.js",
-        "description": "This extension enables OpenAI chat completion API calls.",
-        "url": "extension://@janhq/inference-openai-extension/dist/index.js"
-    },
-    "@janhq/inference-cohere-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-cohere-extension-1.0.0.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-cohere-extension",
-        "productName": "Cohere Inference Engine",
-        "version": "1.0.0",
-        "main": "dist/index.js",
-        "description": "This extension enables Cohere chat completion API calls.",
-        "url": "extension://@janhq/inference-cohere-extension/dist/index.js"
-    },
-    "@janhq/model-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-model-extension-1.0.33.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/model-extension",
-        "productName": "Model Management",
-        "version": "1.0.33",
-        "main": "dist/index.js",
-        "description": "Model Management Extension provides model exploration and seamless downloads.",
-        "url": "extension://@janhq/model-extension/dist/index.js"
-    },
-    "@janhq/monitoring-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-monitoring-extension-1.0.10.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/monitoring-extension",
-        "productName": "System Monitoring",
-        "version": "1.0.10",
-        "main": "dist/index.js",
-        "description": "This extension provides system health and OS level data.",
-        "url": "extension://@janhq/monitoring-extension/dist/index.js"
-    },
-    "@janhq/assistant-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-assistant-extension-1.0.1.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/assistant-extension",
-        "productName": "Jan Assistant",
-        "version": "1.0.1",
-        "main": "dist/index.js",
-        "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models.",
-        "url": "extension://@janhq/assistant-extension/dist/index.js"
-    },
-    "@janhq/tensorrt-llm-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-tensorrt-llm-extension-0.0.3.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/tensorrt-llm-extension",
-        "productName": "TensorRT-LLM Inference Engine",
-        "version": "0.0.3",
-        "main": "dist/index.js",
-        "description": "This extension enables Nvidia's TensorRT-LLM for the fastest GPU acceleration. See the [setup guide](https://jan.ai/guides/providers/tensorrt-llm/) for next steps.",
-        "url": "extension://@janhq/tensorrt-llm-extension/dist/index.js"
-    },
-    "@janhq/inference-cortex-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-cortex-extension-1.0.15.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-cortex-extension",
-        "productName": "Cortex Inference Engine",
-        "version": "1.0.15",
-        "main": "dist/index.js",
-        "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
-        "url": "extension://@janhq/inference-cortex-extension/dist/index.js"
-    }
-}
-```
-
-## Common Use Cases
-
-1. **Disable an Extension**
-   ```json
-   {
-     "@janhq/example-extension": {
-       "_active": false
-       // other settings...
-     }
-   }
-   ```
-
-2. **Update Entry Point**
-   ```json
-   {
-     "@janhq/example-extension": {
-       "main": "new/path/index.js"
-       // other settings...
-     }
-   }
-   ```
-
-## Important Notes
-
- Always backup the `extensions.json` file before making changes
- Invalid JSON syntax can prevent Jan from starting properly
- Most users should manage extensions through Jan's Settings UI
- Changes require restarting Jan to take effect
--- a/docs/src/pages/docs/custom-providers.mdx
+++ b/docs/src/pages/docs/custom-providers.mdx
@ -1,6 +1,6 @@
 ---
-title: Install Engines
-description: Learn about Jan's default extensions and explore how to configure them.
+title: Adding New Providers
+description: Learn about how to add new providers to Jan.
  [
    Jan,
    Customizable Intelligence, LLM,
@ -19,19 +19,104 @@ description: Learn about Jan's default extensions and explore how to configure t
 import { Callout } from 'nextra/components'
 import { Settings, EllipsisVertical } from 'lucide-react'

-# Install Engines
+# Adding New Providers
+
+Any local and remote model provider that has a compatible OpenAI API can be used in combination with Jan and both
+can be added in the same way.
+
+## Local
+
+Jan only allows you to run GGUF files but you can add other files via an OpenAI API compatible server like [vllm](https://vllm.ai/)
+or [SGLang]().
+
+### vLLM
+
+First, make sure you have [uv](https://docs.astral.sh/uv/) installed.
+
+Next, create a virtual environment to install vLLM in it.
+
+```sh
+uv venv .venv --python 3.12
+```
+
+Activate your environment.
+```sh
+source .venv/bin/activate
+```
+
+Install `vllm` in it.
+```sh
+uv pip install vllm
+```
+
+Next, start a server and download a model at the same time.
+
+
+
+
+### SGLang
+
+Create a virtual environment to install vLLM in it.
+
+```sh
+uv venv .venv --python 3.12
+```
+
+Activate your environment.
+```sh
+source .venv/bin/activate
+```
+
+Install `vllm` in it.
+```sh
+uv pip install vllm
+```
+
+Next, start a server and download a model at the same time.

-## Install Local Engines
-Jan currently doesn't support installing a local engine yet.

 ## Install Remote Engines

-### Step-by-step Guide
-You can add any OpenAI API-compatible providers like OpenAI, Anthropic, or others.
-To add a new remote engine:
+You can add any OpenAI API-compatible providers like Together AI, Fireworks AI, and others. Let's walk through
+some examples.
+
+### Together AI
+
+Create an account or login to existing [Together AI](https://together.ai)'s Dashboard.
+
+![Together's Dashboard](./_assets/together.png)
+
+Click on a View all models.
+
+
+Pick a free model like `deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free`.
+
+
+Where it says **Run Inference**, click on `curl` and grab:
+
+- `url`: `https://api.together.xyz/v1/chat/completions`
+- `model`: `deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free`
+
+Get your API key.
+
+
+Go back to Jan and open **Settings > Model Providers** and click on **Add Provider**.
+
+
+
+Name it Together and proceed.
+
+
+Add your API key and change the URL.
+
+
+Enter the `model id` by clicking on the `+` sign where it says Models.
+
+
+

 1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Engines**
-1. At **Remote Engine** category, click **+ Install Engine** 
+1. At **Remote Engine** category, click **+ Install Engine**

 <br/>
 ![Install Remote Engines](./_assets/install-engines-01.png)
@ -57,7 +142,7 @@ To add a new remote engine:
 > - The conversion functions are only needed for providers that don't follow the OpenAI API format. For OpenAI-compatible APIs, you can leave these empty.
 > - For OpenAI-compatible APIs like OpenAI, Anthropic, or Groq, you only need to fill in the required fields. Leave optional fields empty.

-4. Click **Install** 
+4. Click **Install**
 5. Once completed, you should see your engine in **Engines** page:
    - You can rename or uninstall your engine
    - You can navigate to its own settings page
@ -78,7 +163,7 @@ Here's how to set up OpenAI as a remote engine:


 #### Custom APIs Setup
-If you're integrating an API that doesn't follow OpenAI's format, you'll need to use the conversion functions. 
+If you're integrating an API that doesn't follow OpenAI's format, you'll need to use the conversion functions.
 Let's say you have a custom API with this format:

 ```javascript
@ -107,53 +192,51 @@ Transform response template: your transform response template here
 ```

 1. Header template
-```
+```json
 "Authorization: Bearer {{api_key}}"
 ```
 2. Transform request template:
 Convert from Jan's OpenAI-style format to your API's format
-```
+
+```json
 "chat_completions": {
  "url": "https://api.custom_endpoint.com/v1/messages",
  "template": "{
 {% for key, value in input_request %}
-  {% if key == "messages" %} 
+  {% if key == "messages" %}
    "prompt": "{{ last(input_request.messages).content }}"
  {% else if key == "max_tokens" or key == "temperature" %}
-    "{{ key }}": {{ tojson(value) }}  
+    "{{ key }}": {{ tojson(value) }}
  {% endif %}
 {% endfor %}
 }"
 }
 ```

-
 3. Transform response template
 Convert from your API's format back to OpenAI-style format
-```
+
+```json
 "chat_completions": {
-"template": "{
-{
-  "choices": [{
-      "message": {
-        "role": "assistant",
-        "content": "{{ input_request.generated_text }}"
+  "template": {
+    "choices": [{
+        "message": {
+          "role": "assistant",
+          "content": "{{ input_request.generated_text }}"
+        }
+      }],
+      "usage": {
+        "total_tokens": "{{ input_request.tokens_used }}"
      }
-    }],
-    "usage": {
-      "total_tokens": {{ input_request.tokens_used }}
-    }
-}
-}"
+  }
 }
 ```

-
-
 **Expected Formats:**

 1. Jan's Request Format
-```
+
+```json
 {
  "messages": [
    {"role": "user", "content": "What is AI?"}
@ -164,7 +247,8 @@ Convert from your API's format back to OpenAI-style format
 ```

 2. Jan's Expected Response Format
-```
+
+```json
 {
  "choices": [{
    "message": {
@ -177,17 +261,3 @@ Convert from your API's format back to OpenAI-style format
  }
 }
 ```
-
-<Callout type="warning">
-Make sure to test your conversion functions thoroughly. Incorrect conversions may cause errors or unexpected behavior.
-</Callout>
-
-
-
-
-
-
-
-
-
-
--- a/docs/src/pages/docs/data-folder.mdx
+++ b/docs/src/pages/docs/data-folder.mdx
@ -32,9 +32,11 @@ Jan stores your data locally in JSON format. Your data is yours alone.
 Via Jan:
 1. Settings (<Settings width={16} height={16} style={{display:"inline"}}/>) > Advanced Settings
 2. Click <FolderOpen width={16} height={16} style={{display:"inline"}}/>
+
 <br/>
 ![Open Jan Data Folder](./_assets/settings-11.png)
 <br/>
+
 Via Terminal:

 ```bash
--- a/docs/src/pages/docs/desktop.mdx
+++ b/docs/src/pages/docs/desktop.mdx
@ -1,35 +0,0 @@
---
-title: Installation
-description: Jan is a ChatGPT-alternative that runs on your computer, with a local API server.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Hardware Setup,
-    GPU,
-  ]
---
-
-import { Cards, Card } from 'nextra/components'
-import childPages from './desktop/_meta.json';
-
-# Installation
-
-<br/>
-
-<Cards
-  children={Object.keys(childPages).map((key, i) => (
-    <Card
-      key={i}
-      title={childPages[key].title}
-      href={childPages[key].href}
-    />
-  ))}
-/>
--- a/docs/src/pages/docs/error-codes.mdx
+++ b/docs/src/pages/docs/error-codes.mdx
@ -1,101 +0,0 @@
---
-title: Error Codes
-description: Learn Jan application's error codes and how to solve them.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    troubleshooting,
-    error codes,
-    broken build,
-    something amiss,
-    unexpected token,
-    undefined issue,
-    permission denied,
-  ]
---
-
-import { Tabs } from 'nextra/components'
-import { Callout } from 'nextra/components'
-
-# Error Codes
-This article summarizes error codes in the Jan application, categorized by application feature. Each error includes a unique code and a **Prefix** indicating its category.
-The following sections detail the error categories, their descriptions, and the error types.
-<Callout type="info">
-See the Solution column for error troubleshooting.
-</Callout>
-## Error Code Categories
-
-### 1. Installation Errors
-These errors relate to issues encountered during installation, including desktop and server setups.
- **Prefix**: INST
-
-### 2. Hardware Setup Errors
-These errors relate to configuring and setting up hardware components.
- **Prefix**: HW
-#### Error Types
-| Error Code | Cause                      | Solution                                                                |
-|------------|-----------------------------------------|-------------------------------------------------------------------------------------------|
-| HW-1       | The CUDA toolkit may be unavailable.     | [Troubleshooting Nvidia GPU](/docs/troubleshooting#1-ensure-gpu-mode-requirements) |
-| HW-2       | Problem with Nvidia drivers.             | [Troubleshooting Nvidia GPU](/docs/troubleshooting#troubleshooting-nvidia-gpu)     |
-
-
-### 3. Architecture Errors
-These errors relate to problems with the overall system architecture and configuration setups.
- **Prefix**: ARCH
-
-### 4. Basic Usage Errors
-These errors occur during the basic usage of the application, including issues with models, assistants, and tools.
- **Prefix**: USG
-#### Error Types
-| Error Code | Cause                | Solution           |
-|------------|-----------------------------------|--------------------------------------|
-| USG-1      | Model is currently unavailable.    | [How to Use Model](/docs/models) |
-
-
-### 5. Advanced Settings Errors
-These errors relate to advanced settings and configurations within the application.
- **Prefix**: ADV
-
-### 6. Inference Engine Errors
-These errors relate to inference engines, both local and remote.
- **Prefix**: IE
-#### Error Types
-| Error Code | Cause                          | Solution                                                                                    |
-|------------|---------------------------------------------|---------------------------------------------------------------------------------------------------------------|
-| IE-2       | The OpenAI model's API key is invalid.          | [How to Integrate OpenAI API](/docs/remote-inference/openai#how-to-integrate-openai-api-with-jan) |
-| IE-3       | The Groq model's API key is invalid.             | [How to Integrate Groq API](/docs/remote-inference/groq#how-to-integrate-groq-api-with-jan)      |
-| IE-4       | The Mistral model's API key is invalid.          | [How to Integrate Mistral API](/docs/remote-inference/mistralai)                                 |
-| IE-5       | The OpenRouter model's API key is invalid.       | [How to Integrate OpenRouter API](/docs/remote-inference/openrouter)                                 |
-
-### 7. Local API Server Errors
-These errors relate to the local API server's functionality.
- **Prefix**: API
-#### Error Types
-| Error Code | Cause                     | Solution                                   |
-|------------|----------------------------------------|--------------------------------------------------------------|
-| API-1      | Port 39291 is currently unavailable.     | [Local API Server Guide](/docs/local-api#step-1-set-the-local-server) |
-
-
-### 8. Extensions and Integration Errors
-These errors relate to integrating the application with external systems or extensions.
- **Prefix**: EXT
-
-### 9. Troubleshooting Errors
-These errors occur during the troubleshooting processes and procedures.
- **Prefix**: TRO
-
-### 10. Unclear Errors
-These errors don't classify into the standard categories above, making their causes difficult to identify.
-#### Error Codes
-| Error Title      | Solution |
-|-------------------|------------------|
-| Something’s Amiss | [Troubleshooting Something’s Amiss](/docs/troubleshooting#somethings-amiss) |
-| Undefined Issue   | [Troubleshooting Undefined Issue](/docs/troubleshooting#undefined-issue) |
--- a/docs/src/pages/docs/extensions-settings/_meta.json
+++ b/docs/src/pages/docs/extensions-settings/_meta.json
@ -1,11 +0,0 @@
-{
-    "model-management": {
-      "title": "Model Management",
-      "href": "/docs/extensions-settings/model-management"
-    },
-    "system-monitoring": {
-      "title": "System Monitoring",
-      "href": "/docs/extensions-settings/system-monitoring"
-    }
-}
-  
--- a/docs/src/pages/docs/extensions-settings/model-management.mdx
+++ b/docs/src/pages/docs/extensions-settings/model-management.mdx
@ -1,42 +0,0 @@
---
-title: Model Management
-description: Learn about Jan's default extensions and explore how to configure them.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Jan Extensions,
-    Extensions,
-  ]
---
-
-import { Callout } from 'nextra/components'
-import { Settings, EllipsisVertical, Plus, FolderOpen, Pencil } from 'lucide-react'
-
-# Model Management 
-
-This extension configures how Jan handles model downloads and management:
- Model discovery and browsing
- Version control & configuration handling
- Download management
-
-## Hugging Face Access Token
-
-Access tokens authenticate your identity to Hugging Face Hub for model downloads. 
-1. Get your token from [Hugging Face Tokens](https://huggingface.co/docs/hub/en/security-tokens)
-2. Enter your token in **Settings > Model Providers > Llama.cpp > Hugging Face Access Token**
-
-<Callout type="warning">
-Keep your access tokens secure and never share them. 
-</Callout>
-
-<br/>
-![Model Management](../_assets/extensions-02.png)
-<br/>
--- a/docs/src/pages/docs/extensions-settings/system-monitoring.mdx
+++ b/docs/src/pages/docs/extensions-settings/system-monitoring.mdx
@ -1,54 +0,0 @@
---
-title: System Monitoring
-description: Learn about Jan's default extensions and explore how to configure them.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Jan Extensions,
-    Extensions,
-  ]
---
-import { Callout } from 'nextra/components'
-import { Settings, EllipsisVertical } from 'lucide-react'
-
-
-# System Monitoring
-Provides system health and OS level data:
- Hardware utilization tracking
- Performance monitoring
- Error logging
-
-You can configure your logs in Jan in **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Core Extensions** > **System Monitoring**:
-
-
-## Enable App Logs
-Jan can save logs locally on your computer for:
- Debugging model issues
- Crash reports
- Download troubleshooting
-
-To enable, toggle on **Enable App Logs**:
-
-<br/>
-![Model Management](../_assets/extensions-03.png)
-<br/>
-
-
-## Log Cleaning Interval
-Set automatic log deletion interval in milliseconds:
- Default: 120000 (2 minutes); however, there's no minimum or maximum intervals 
- Controls disk space usage
- Prevents log accumulation
-
-<Callout type='info'>
-You can clear Jan logs manually with [Clear logs](/docs/settings#log-management) in **Privacy**.
-</Callout>
-
--- a/docs/src/pages/docs/extensions.mdx
+++ b/docs/src/pages/docs/extensions.mdx
@ -1,75 +0,0 @@
---
-title: Extensions Overview
-description: Learn about Jan's default extensions and explore how to configure them.
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Jan Extensions,
-    Extensions,
-  ]
---
-
-import { Callout } from 'nextra/components'
-import { Settings, EllipsisVertical } from 'lucide-react'
-
-
-# Extensions 
-
-## Overview
-Extensions are modular components that add functionality to Jan. Each extension is designed to handle specific features. 
-
-Extensions can be managed through **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Extensions**:
-
-<br/>
-![Remove Model](./_assets/extensions-01.png)
-<br/>
-
-## Core Extensions 
-
-### Cortex
-The primary extension that manages both **local** and **remote engines** capabilities:
-
-#### Local Engines
-[llama.cpp](/docs/local-engines/llama-cpp): Fast, efficient local inference engine that runs GGUF models directly on your device. Powers Jan's default local AI capabilities with support for multiple hardware configurations.
-
-#### Remote Engines
- [Anthropic](/docs/remote-models/anthropic): Access Claude models
- [Cohere](/docs/remote-models/cohere): Access Cohere's models
- [Groq](/docs/remote-models/groq): High-performance inference
- [Martian](/docs/remote-models/martian): Specialized model access
- [MistralAI](/docs/remote-models/mistralai): Access Mistral models
- [NVIDIA NIM](/docs/remote-models/nvidia-nim) (NVIDIA Inference Microservices): Platform for deploying and serving GPU-accelerated AI models, providing enterprise-grade reliability and scalability.
- [OpenAI](/docs/remote-models/openai): Access GPT models
- [OpenRouter](/docs/remote-models/openrouter): Multi-provider model access
- [Triton-TRT-LLM](/docs/remote-models/triton): High-performance inference backend using NVIDIA Triton Inference Server with TensorRT-LLM optimization, designed for large-scale model deployment.
-
-
-
-### Jan Assistant
-Enables assistants functionality, including Jan - the default assistant that can utilize all downloaded models. This extension manages:
- Default assistant configurations
- Model selection
- Conversation management
-
-### Conversational 
-Manages all chat-related functionality and data persistence:
-
-
-### [Model Management](/docs/extensions-settings/model-management)
-Provides model exploration and seamless downloads:
- Model discovery and browsing
- Version control & configuration handling
- Download management
-
-### [System Monitoring](/docs/extensions-settings/system-monitoring)
-Provides system health and OS level data:
- Hardware utilization tracking
- Performance monitoring
- Error logging
--- a/docs/src/pages/docs/index.mdx
+++ b/docs/src/pages/docs/index.mdx
@ -1,6 +1,6 @@
 ---
 title: Jan
-description: Jan is an open-source, self-hosted alternative to OpenAI's platform - build and run AI on your own desktop or server.
+description: Jan is an open-source AI assistant and self-hosted AI platform - build and run AI on your own desktop or server.
 keywords:
  [
    Jan,
@ -25,37 +25,35 @@ import FAQBox from '@/components/FaqBox'
 ![Jan's Cover Image](./_assets/jan-app.png)


-Jan is an AI chat application that runs 100% offline on your desktop & mobile (*coming soon*). Our goal is to
+Jan is an AI chat application that runs 100% offline on your desktop and (*soon*) on mobile. Our goal is to
 make it easy for anyone, with or without coding skills, to download and use AI models with full control and
 [privacy](https://www.reuters.com/legal/legalindustry/privacy-paradox-with-ai-2023-10-31/).

-Jan is powered by [Cortex](https://cortex.so/), our embeddable local AI engine which provides an OpenAI-compatible
-API that can run in the background at `https://localhost:1337` (or a custom port). This enables you to power other
-applications running locally with AI capabilities. For example, you can connect tools like [Continue.dev](https://jan.ai/integrations/coding/vscode)
-and [Cline](https://cline.bot/), or any OpenAI-compatible app, to Jan and start coding on their supported editors using
-models hosted in Jan.
+Jan is powered by [Llama.cpp](https://github.com/ggerganov/llama.cpp), a local AI engine that provides an OpenAI-compatible
+API that can run in the background by default at `https://localhost:1337` (or your custom port). This enables you to power all sorts of
+applications with AI capabilities from your laptop/PC. For example, you can connect local tools like [Continue](https://jan.ai/integrations/coding/vscode)
+and [Cline](https://cline.bot/) to Jan and power them using your favorite models.

-Jan doesn't limit you to locally hosted models, meaning, you can create an API key from your favorite model provider
-and add it to Jan via the configuration's page and start talking to your favorite paid models.
+Jan doesn't limit you to locally hosted models, meaning, you can create an API key from your favorite model provider,
+add it to Jan via the configuration's page and start talking to your favorite models.

 ### Features

- Download popular open-source LLMs (Llama3, Gemma3, Mistral, and more) from the HuggingFace [Model Hub](./docs/models/manage-models.mdx)
-or import any GGUF models available locally
- Connect to [cloud model services](/docs/remote-models/openai) (OpenAI, Anthropic, Mistral, Groq, etc.)
+- Download popular open-source LLMs (Llama3, Gemma3, Qwen3, and more) from the HuggingFace [Model Hub](./docs/models/manage-models.mdx)
+or import any GGUF files (the model format used by llama.cpp) available locally
+- Connect to [cloud services](/docs/remote-models/openai) (OpenAI, Anthropic, Mistral, Groq, etc.)
 - [Chat](./docs/threads.mdx) with AI models & [customize their parameters](./docs/models/model-parameters.mdx) via our
 intuitive interface
- Use our [local API server](https://jan.ai/api-reference) with an OpenAI-equivalent API
- Customize Jan with [extensions](/docs/extensions)
+- Use our [local API server](https://jan.ai/api-reference) with an OpenAI-equivalent API to power other apps.

 ### Philosophy

 Jan is built to be [user-owned](about#-user-owned), this means that Jan is:
- Truly open source via the [AGPLv3 license](https://github.com/menloresearch/jan/blob/dev/LICENSE)
+- Truly open source via the [Apache 2.0 license](https://github.com/menloresearch/jan/blob/dev/LICENSE)
 - [Data is stored locally, following one of the many local-first principles](https://www.inkandswitch.com/local-first)
- Runs 100% offline, with privacy by default
+- Internet is optional, Jan ca run 100% offline
 - Free choice of AI models, both local and cloud-based
- We do not collect or sell user data. See our [Privacy](/privacy).
+- We do not collect or sell user data. See our [Privacy Policy](./privacy).

 <Callout>
  You can read more about our [philosophy](/about#philosophy) here.
@ -67,17 +65,15 @@ Jan is inspired by the concepts of [Calm Computing](https://en.wikipedia.org/wik

 ## Acknowledgements

-Jan is built on the shoulders of many upstream open-source projects:
+Jan is built on the shoulders of many open-source projects like:

 - [Llama.cpp](https://github.com/ggerganov/llama.cpp/blob/master/LICENSE)
- [LangChain.js](https://github.com/langchain-ai/langchainjs/blob/main/LICENSE)
- [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM/blob/main/LICENSE)
 - [Scalar](https://github.com/scalar/scalar)

 ## FAQs

 <FAQBox title="What is Jan?">
-  Jan is a customizable AI assistant that runs offline on your computer - a privacy-focused alternative to tools like
+  Jan is a customizable AI assistant that can run offline on your computer - a privacy-focused alternative to tools like
  ChatGPT, Anthropic's Claude, and Google Gemini, with optional cloud AI support.
 </FAQBox>

@ -87,8 +83,10 @@ Jan is built on the shoulders of many upstream open-source projects:
 </FAQBox>

 <FAQBox title="Is Jan compatible with my system?">
-  Jan supports all major operating systems, [Mac](/docs/desktop/mac#compatibility), [Windows](/docs/desktop/windows#compatibility),
-  and [Linux](docs/desktop/linux).
+  Jan supports all major operating systems,
+  - [Mac](/docs/desktop/mac#compatibility)
+  - [Windows](/docs/desktop/windows#compatibility)
+  - [Linux](docs/desktop/linux).

  Hardware compatibility includes:
  - NVIDIA GPUs (CUDA)
@ -109,11 +107,11 @@ Jan is built on the shoulders of many upstream open-source projects:
  </Callout>

  You can optionally share anonymous usage statistics to help improve Jan, but your conversations are never
-  shared. See our complete [Privacy Policy](./docs/privacy.mdx).
+  shared. See our complete [Privacy Policy](./docs/privacy).
 </FAQBox>

 <FAQBox title="What models can I use with Jan?">
-  - Download optimized models from [Jan Hub](/docs/models/manage-models#1-download-from-jan-hub-recommended)
+  - Download optimized models from the [Jan Hub](/docs/models/manage-models#1-download-from-jan-hub-recommended)
  - Import GGUF models from Hugging Face or your local files
  - Connect to cloud providers like OpenAI, Anthropic, Mistral and Groq (requires your own API keys)
 </FAQBox>
@ -127,21 +125,16 @@ Jan is built on the shoulders of many upstream open-source projects:
  Yes! Once you've downloaded a local model, Jan works completely offline with no internet connection needed.
 </FAQBox>

-<FAQBox title="How can I customize or extend Jan?">
-  Jan has an extensible architecture similar to VSCode and Obsidian. You can build custom features using our
-  [extensions API](/docs/extensions), which powers many of Jan's core features.
-</FAQBox>
-
 <FAQBox title="How can I contribute or get community help?">
  - Join our [Discord community](https://discord.gg/qSwXFx6Krr) to connect with other users
  - Contribute through [GitHub](https://github.com/menloresearch/jan) (no permission needed!)
-  - Get troubleshooting help in our [Discord](https://discord.com/invite/FTk2MvZwJH) [#🆘|jan-help](https://discord.com/channels/1107178041848909847/1192090449725358130) channel
-  - Check our [Troubleshooting](./docs/troubleshooting.mdx) guide for common issues
+  - Get troubleshooting help in our [Discord](https://discord.com/invite/FTk2MvZwJH) channel [#🆘|jan-help](https://discord.com/channels/1107178041848909847/1192090449725358130)
+  - Check our [Troubleshooting](./docs/troubleshooting) guide for common issues
 </FAQBox>

 <FAQBox title="Can I self-host Jan?">
-  Yes! We fully support the self-hosted movement. Either [download Jan](./download.mdx) directly or fork and build
-  from our [GitHub repository](https://github.com/menloresearch/jan).
+  Yes! We fully support the self-hosted movement. Either download Jan directly or fork it on
+  [GitHub repository](https://github.com/menloresearch/jan) and build it from source.
 </FAQBox>

 <FAQBox title="What does Jan stand for?">
--- a/docs/src/pages/docs/install-extensions.mdx
+++ b/docs/src/pages/docs/install-extensions.mdx
@ -1,143 +0,0 @@
---
-title: Install Extensions
-description: A step-by-step guide on installing an extension.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Jan Extensions,
-    Extensions,
-  ]
---
-
-import { Callout } from 'nextra/components'
-import { Settings, Plus } from 'lucide-react'
-
-
-# Install Extensions
-
-Jan uses a modular extension system that allows developers to add new functionality to the application. By default, Jan comes with several [pre-installed extensions](/docs/extensions#core-extensions) that provide core functionalities. You can manually add custom third-party extensions at your own risk.
-
-## Prerequisites
-
-### Required Tools
- Node.js (version 20.0.0 or higher)
- Basic understanding of TypeScript
-
-### Jan's Architecture
-Jan is built on:
- **Electron**: Powers the desktop application
-  - Extensions run in the main process
-  - Access to Node.js APIs and filesystem
-  - Cannot use browser-only libraries
- **Next.js**: Handles the user interface
- **Node.js**: Runs extension logic
-
-This architecture means your extensions:
- Can use Node.js packages and system APIs
- Need to handle client/server communication properly
- Should follow Electron's security practices
-
-## Create Extensions
-
-<Callout type="info">
-Jan currently only accepts `.tgz` file format for extensions.
-</Callout>
-
-> **Heads Up:**
-> - Use the following structure and setup as a **reference** only.
-> - You're free to develop extensions using any approach or structure that works for your needs. Feel free to experiment and innovate.
-> - If you already have your own `.tgz` extension file, please move forward to [install extension](/docs/install-extensions#install-extensions) step. 
-
-### Extension Structure
-Your extension should follow this basic structure:
-
-```
-my-extension/
-├── package.json           # Extension metadata and dependencies
-├── dist/                 # Compiled JavaScript files
-│   └── index.js         # Main extension entry point
-├── src/                 # Source code
-│   └── index.ts        # TypeScript source
-└── README.md           # Extension documentation
-```
-
-### Required package.json Fields
-```json
-{
-  "name": "@your-org/extension-name",
-  "version": "1.0.0",
-  "main": "dist/index.js",
-  "types": "dist/index.d.ts",
-  "jan": {
-    "type": "extension",
-    "displayName": "Your Extension Name",
-    "description": "Description of what your extension does"
-  },
-  "scripts": {
-    "build": "tsc",
-    "package": "npm pack"
-  },
-  "dependencies": {
-    // List your dependencies
-  },
-  "devDependencies": {
-    "typescript": "^5.0.0"
-  }
-}
-```
-
-### Example Extension Template
-
-You can find a template for creating Jan extensions in our [example repository](https://github.com/menloresearch/extension-template).
-## Install Extensions 
-
-To install a custom extension in Jan:
-
-1. Open Jan, navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Extensions**
-2. Click **<Plus width={16} height={16} style={{display:"inline"}}/> Install Extension**
-3. Select your extension `.tgz` file & save
-4. Restart Jan
-
-After restart, the `~/jan/data/extensions/extensions.json` file will be updated automatically to include your new extension.
-
-<br/>
-![Install Extensions](./_assets/extensions-04.png)
-<br/>
-
-## Troubleshooting
-**Check Extensions Logs**
-
-```bash
-# View application logs
-~/jan/logs/app.log
-
-# Open Developer Tools
-Mac: Cmd + Option + I
-Windows/Linux: Ctrl + Shift + I
-```
-**Common Error Patterns**
-
-1. Initialization Failures
-   - Extension fails to load/activate
-   - Verify package.json configuration
-   - Check extension dependencies
-
-2. Runtime Errors
-   - Node.js module errors
-   - API connection issues
-   - Authentication failures
-
-3. Build Problems
-```bash
-rm -rf dist/
-npm run build
-```
-
--- a/Show More
+++ b/Show More