diff --git a/docs/src/components/Home/Hero/index.tsx b/docs/src/components/Home/Hero/index.tsx
index 009681197..09ea04b5e 100644
--- a/docs/src/components/Home/Hero/index.tsx
+++ b/docs/src/components/Home/Hero/index.tsx
@@ -80,7 +80,7 @@ const Hero = () => {
           </div>
 
           <p className="text-xl -mt-1 leading-relaxed text-black/60 dark:text-white/60">
-            Jan is an open source ChatGPT-alternative that runs 100% offline.
+            Jan is an open source AI Assistant that runs 100% offline.
           </p>
         </div>
         <div className="mb-4 mt-8">
diff --git a/docs/src/pages/_meta.json b/docs/src/pages/_meta.json
index c83c40ec6..bd3d96dc8 100644
--- a/docs/src/pages/_meta.json
+++ b/docs/src/pages/_meta.json
@@ -11,16 +11,6 @@
     "type": "page",
     "title": "Documentation"
   },
-  "cortex": {
-    "type": "page",
-    "title": "Cortex",
-    "display": "hidden"
-  },
-  "integrations": {
-    "type": "page",
-    "title": "Integrations",
-    "display": "hidden"
-  },
   "changelog": {
     "type": "page",
     "title": "Changelog",
diff --git a/docs/src/pages/cortex/_assets/architecture.png b/docs/src/pages/cortex/_assets/architecture.png
deleted file mode 100644
index a91f1a548..000000000
Binary files a/docs/src/pages/cortex/_assets/architecture.png and /dev/null differ
diff --git a/docs/src/pages/cortex/_assets/cortex-cover.png b/docs/src/pages/cortex/_assets/cortex-cover.png
deleted file mode 100644
index 9ef3816f8..000000000
Binary files a/docs/src/pages/cortex/_assets/cortex-cover.png and /dev/null differ
diff --git a/docs/src/pages/cortex/_assets/cortex-llamacpp-act.png b/docs/src/pages/cortex/_assets/cortex-llamacpp-act.png
deleted file mode 100644
index 5cc93b2a2..000000000
Binary files a/docs/src/pages/cortex/_assets/cortex-llamacpp-act.png and /dev/null differ
diff --git a/docs/src/pages/cortex/_assets/cortex-llamacpp-arch.png b/docs/src/pages/cortex/_assets/cortex-llamacpp-arch.png
deleted file mode 100644
index 6118d9126..000000000
Binary files a/docs/src/pages/cortex/_assets/cortex-llamacpp-arch.png and /dev/null differ
diff --git a/docs/src/pages/cortex/_meta.json b/docs/src/pages/cortex/_meta.json
deleted file mode 100644
index 74a870da5..000000000
--- a/docs/src/pages/cortex/_meta.json
+++ /dev/null
@@ -1,136 +0,0 @@
-{
-  "-- Switcher": {
-    "type": "separator",
-    "title": "Switcher"
-  },
-  "get-started": {
-    "title": "GET STARTED",
-    "type": "separator"
-  },
-  "index": {
-    "title": "Overview",
-    "href": "/cortex"
-  },
-  "quickstart": {
-    "title": "Quickstart"
-  },
-  "hardware": {
-    "title": "Hardware"
-  },
-  "installation": {
-    "title": "Installation"
-  },
-  "basicusage": {
-    "title": "BASIC USAGE",
-    "type": "separator"
-  },
-  "command-line": {
-    "title": "CLI"
-  },
-  "ts-library": {
-    "title": "Typescript Library"
-  },
-  "py-library": {
-    "title": "Python Library"
-  },
-  "server": {
-    "title": "Server Endpoint"
-  },
-  "capabilities": {
-    "title": "CAPABILITIES",
-    "type": "separator"
-  },
-  "text-generation": {
-    "title": "Text Generation"
-  },
-  "function-calling": {
-    "display": "hidden",
-    "title": "Function Calling"
-  },
-  "embeddings": {
-    "display": "hidden",
-    "title": "Embeddings"
-  },
-  "fine-tuning": {
-    "display": "hidden",
-    "title": "Fine-tuning"
-  },
-  "vision": {
-    "display": "hidden",
-    "title": "Vision"
-  },
-  "model-operations": {
-    "display": "hidden",
-    "title": "Model Operations"
-  },
-  "rag": {
-    "display": "hidden",
-    "title": "RAG"
-  },
-  "assistant": {
-    "display": "hidden",
-    "title": "ASSISTANTS",
-    "type": "separator"
-  },
-  "assistants": {
-    "display": "hidden",
-    "title": "Overview"
-  },
-  "commandline": {
-    "title": "COMMAND LINE",
-    "type": "separator"
-  },
-  "cli": {
-    "title": "cortex"
-  },
-  "training-engines": {
-    "display": "hidden",
-    "title": "TRAINING ENGINES"
-  },
-  "extensions": {
-    "display": "hidden",
-    "title": "EXTENSIONS",
-    "type": "separator"
-  },
-  "build-extension": {
-    "display": "hidden",
-    "title": "Build an Extension"
-  },
-  "architectures": {
-    "title": "ARCHITECTURE",
-    "type": "separator"
-  },
-  "architecture": {
-    "title": "Cortex"
-  },
-  "cortex-cpp": {
-    "title": "Cortex.cpp"
-  },
-  "cortex-llamacpp": {
-    "title": "Cortex.llamacpp"
-  },
-  "cortex-tensorrt-llm": {
-    "title": "Cortex.tensorrt-llm",
-    "display": "hidden"
-  },
-  "cortex-python": {
-    "title": "Cortex.python",
-    "display": "hidden"
-  },
-  "cortex-openvino": {
-    "title": "Cortex.OpenVino",
-    "display": "hidden"
-  },
-  "ext-architecture": {
-    "display": "hidden",
-    "title": "Extensions"
-  },
-  "troubleshooting": {
-    "title": "TROUBLESHOOTING",
-    "type": "separator"
-  },
-  "error-codes": {
-    "display": "hidden",
-    "title": "Error Codes"
-  }
-}
diff --git a/docs/src/pages/cortex/architecture.mdx b/docs/src/pages/cortex/architecture.mdx
deleted file mode 100644
index 11959ccc3..000000000
--- a/docs/src/pages/cortex/architecture.mdx
+++ /dev/null
@@ -1,202 +0,0 @@
----
-title: Overview
-description: Cortex Architecture
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-## Introduction
-
-Cortex is an alternative to the OpenAI API designed to operate entirely on your local hardware infrastructure. This headless backend platform is also engineered to support TensorRT-LLM, ensuring high-performance machine-learning model execution. It is packaged with a Docker-inspired command-line interface and a Typescript client library.
-
-The following guide details Cortex's core components, providing insights and instructions for those interested in customizing It to meet specific requirements.
-
-
-## Architecture
-
-![Architecture](./_assets/architecture.png)
-
-### Main Components
-
-Cortex is architected with several key components, each designed to fulfill specific roles within the system, ensuring efficient processing and response to client requests.
-
-1. **Cortex JS**: This component acts as the interface layer where requests are received and responses are sent.
-2. **Server:** The central processing unit of Cortex, this component coordinates all activities across the system. It manages the data flow and ensures operations are correctly executed.
-3. **Kernel**: This component checks the server's hardware configuration. Based on the current hardware setup, it determines whether additional dependencies are required, optimizing the system for performance and compatibility.
-4. **Runtime**: This process involves dynamically loading necessary libraries and models based on the server's current needs and processing requests.
-5. **Dynamic Libraries**: Consists of inference engines loaded on-demand to enhance Cortex's processing power. These engines are essential for performing specialized computational tasks. Currently, Cortex supports:
-    - Llama.cpp Engine
-    - TensorRT-LLM Engine
-    - Python-runtime Engine
-
-### Data Structure
-
-Cortex is equipped with **MySQL** and **SQLite** databases, offering flexible data management options that can be easily adapted to different environments and requirements. It also has a filesystem data that can store and retrieve data using file-based mechanisms.
-
-#### MySQL
-
-This database is used because it is ideal for Cortex environments where scalability, security, and data integrity are critical. MySQL is well-suited for handling large model-size data from the core extensions.
-
-#### SQLite
-
-This database is used for simplicity and minimal setup. It can handle the small model size from the core extensions and any data from the External extensions.
-
-#### File System
-
-Cortex uses a filesystem approach for managing configuration files, such as `model.yaml` files. These files are stored in a structured directory hierarchy, enabling efficient data retrieval and management.
-
-### Providers
-
-#### Internal Provider
-
-Integral to the CLI, it includes the core binary (**`.cpp`**) and is compiled directly with the CLI, facilitating all application parts' direct access to core functionalities.
-
-#### Core Extensions
-
-These are bundled with the CLI and include additional functionalities like remote engines and API models, facilitating more complex operations and interactions within the same architectural framework.
-
-#### External Extensions
-
-These are designed to be more flexible and are stored externally. They represent potential future expansions or integrations, allowing the architecture to extend its capabilities without modifying the core system.
-
-### Key Dependencies
-
-Cortex developed using NestJS and operates via a Node.js server framework, handling all incoming and outgoing requests. It also has a C++ runtime to handle stateless requests. 
-
-Below is a detailed overview of its core architecture components:
-
-#### NestJS Framework
-
-NestJS framework serves as the backbone of the Cortex. This framework facilitates the organization of server-side logic into modules, controllers, and extensions, which are important for maintaining a clean codebase and efficient request handling.
-
-#### Node.js Server
-
-Node.js is the primary runtime for Cortex, which handles the HTTP requests, executes the server-side logic, and manages the responses.
-
-#### C++ Runtime
-
-C++ runtime is important for managing stateless requests. This component can handle intensive tasks that require optimized performance.
-
-## Code Structure
-
-The repository is organized to separate concerns between domain definitions, business rules, and adapters or implementations.
-```
-# Entity Definitions
-domain/                    # This is the core directory where the domains are defined.
-  abstracts/               # Abstract base classes for common attributes and methods.
-  models/                  # Domain interface definitions, e.g. model, assistant.
-  repositories/            # Extensions abstract and interface
-
-# Business Rules
-usecases/                  # Application logic 
-	assistants/              # CRUD logic (invokes dtos, entities).
-	chat/                    # Logic for chat functionalities.
-	models/                  # Logic for model operations.
-
-# Adapters & Implementations
-infrastructure/            # Implementations for Cortex interactions
-  commanders/              # CLI handlers
-    models/
-    questions/             # CLI installation UX
-    shortcuts/             # CLI chained syntax
-    types/
-    usecases/              # Invokes UseCases
-
-  controllers/             # Nest controllers and HTTP routes
-		assistants/						 # Invokes UseCases
-	  chat/     						 # Invokes UseCases
-		models/                # Invokes UseCases
-	
-  database/                # Database providers (mysql, sqlite)
-	
-	# Framework specific object definitions
-  dtos/                    # DTO definitions (data transfer & validation)
-  entities/                # TypeORM entity definitions (db schema)
-  
-	# Providers
-  providers/cortex         # Cortex [server] provider (a core extension)
-  repositories/extensions  # Extension provider (core & external extensions)
-
-extensions/                # External extensions
-command.module.ts          # CLI Commands List
-main.ts                    # Entrypoint
-
-```
-<Callout type="info">
-The structure above promotes clean architecture principles, allowing for scalable and maintainable Cortex development.
-</Callout>
-
-## Runtime
-```mermaid
-sequenceDiagram
-    User-)Cortex: "Tell me a joke"
-    Cortex->>HF: Download a model
-    Cortex->>Model Controller/Service: Start the model
-    Cortex->>Chat Controller/Service: POST /completions 
-    Chat Controller/Service ->> Chat UseCases: createChatCompletions()
-    Chat UseCases -->> Model Entity: findOne()
-    Cortex->>Model Entity: Store the model data
-    Chat UseCases -->> Extension Repository: findAll()
-    Extension Repository ->> Cortex Provider: inference()
-    CortexCPP Server ->> Cortex Provider: Port /???
-
-    %% Responses
-    Cortex Provider ->> Extension Repository: inference()
-    Extension Repository ->> Chat UseCases: Response stream
-    Chat UseCases ->> Chat Controller/Service: Formatted response/stream
-    Chat Controller/Service ->> User: "Your mama"
-```
-The sequence diagram above outlines the interactions between various components in the Cortex system during runtime, particularly when handling user requests via a CLI. Here’s a detailed breakdown of the runtime sequence:
-
-1. **User Request**: The user initiates an interaction by requesting “a joke” via the Cortex CLI.
-2. **Model Activation**:
-    - The API directs the request to the `Model Controller/Service`.
-    - The service pulls and starts the appropriate model and posts a request to `'/completions'` to prepare the model for processing.
-3. **Chat Processing**:
-    - The `Chat Controller/Service` processes the user's request using `Chat UseCases`.
-    - The `Chat UseCases` interact with the Model Entity and Extension Repository to gather necessary data and logic.
-4. **Data Handling and Response Formation**:
-    - The `Model Entity` and `Extension Repository` perform data operations, which may involve calling a `Provider` for additional processing.
-    - Data is fetched, stored, and an inference is performed as needed.
-5. **Response Delivery**:
-    - The response is formatted by the `Chat UseCases` and streamed back to the user through the API.
-    - The user receives the processed response, completing the cycle of interaction.
-
-## Roadmap
-
-Our development roadmap outlines key features and epics we will focus on in the upcoming releases. These enhancements aim to improve functionality, increase efficiency, and expand Cortex's capabilities.
-
-- **Crash Report Telemetry**: Enhance error reporting and operational stability by automatically collecting and analyzing crash reports.
-- **RAG**: Improve response quality and contextual relevance in our AI models.
-- **Cortex TensorRT-LLM**: Optimize NVIDIA TensorRT optimizations for LLMs.
-- **Cortex Presets**: Streamline model configurations.
-- **Cortex Python Runtime**: Provide a scalable Python execution environment for Cortex.
-
-## Risks & Technical Debt
-
-Cortex CLI, built with Nest-commander, incorporates extensions to integrate various inference providers. This flexibility, however, introduces certain risks related to dependency management and the objective of bundling the CLI into a single executable binary.
-
-### Key Risks
-
-1. **Complex Dependencies**: Utilizing Nest-commander involves a deep dependency tree, risking version conflicts and complicating updates.
-2. **Bundling Issues**: Converting to a single executable can reveal issues with `npm` dependencies and relative asset paths, leading to potential runtime errors due to unresolved assets or incompatible binary dependencies.
\ No newline at end of file
diff --git a/docs/src/pages/cortex/build-extension.mdx b/docs/src/pages/cortex/build-extension.mdx
deleted file mode 100644
index 6c110b7c6..000000000
--- a/docs/src/pages/cortex/build-extension.mdx
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: Build an Extension
-description: Build an Extension
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli.mdx b/docs/src/pages/cortex/cli.mdx
deleted file mode 100644
index 24a0dd229..000000000
--- a/docs/src/pages/cortex/cli.mdx
+++ /dev/null
@@ -1,54 +0,0 @@
----
-title: Command Line Interface
-description: Cortex CLI.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# Cortex
-
-Cortex is a CLI tool used to interact with the Jan application and its various functions.
-
-<Callout type="info">
-Cortex CLI is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex [command] [flag]
-```
-### Options
-```
-  -v, --version    Cortex version (default: false)
-  -h, --help       display help for command
-```
-## Sub Commands
--  [cortex models](/cortex/cli/models): Manage and configure models.
--  [cortex serve](/cortex/cli/serve): Launch an API endpoint server for the Cortex backend.
--  [cortex chat](/cortex/cli/chat): Send a chat request to a model.
--  [cortex init|setup](/cortex/cli/init): Initialize settings and download dependencies for Cortex.
--  [cortex ps](/cortex/cli/ps): Display active models and their operational status.
--  [cortex kill](/cortex/cli/kill): Terminate active Cortex processes.
--  [cortex pull|download](/cortex/cli/pull): Download a model.
--  [cortex run](/cortex/cli/run): Shortcut to start a model and chat **(EXPERIMENTAL)**.
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/_meta.json b/docs/src/pages/cortex/cli/_meta.json
deleted file mode 100644
index dc2c73e69..000000000
--- a/docs/src/pages/cortex/cli/_meta.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
-  "init": {
-    "title": "cortex init"
-  },
-  "pull": {
-    "title": "cortex pull"
-  },
-  "run": {
-    "title": "cortex run"
-  },
-  "models": {
-    "title": "cortex models"
-  },
-  "ps": {
-    "title": "cortex ps"
-  },
-  "chat": {
-    "title": "cortex chat"
-  },
-  "kill": {
-    "title": "cortex kill"
-  },
-  "serve": {
-    "title": "cortex serve"
-  }
-}
diff --git a/docs/src/pages/cortex/cli/chat.mdx b/docs/src/pages/cortex/cli/chat.mdx
deleted file mode 100644
index e4c086b65..000000000
--- a/docs/src/pages/cortex/cli/chat.mdx
+++ /dev/null
@@ -1,47 +0,0 @@
----
-title: Cortex Chat
-description: Cortex chat command.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex chat`
-
-This command starts a chat session with a specified model, allowing you to interact directly with it through an interactive chat interface.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex chat --model MODEL_ID
-```
-### Options
-```
-  -t, --thread <thread_id>  Thread Id. If not provided, will create new thread
-  -m, --message <message>   Message to send to the model
-  -a, --attach              Attach to interactive chat session (default: false)
-  -h, --help                display help for command
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/init.mdx b/docs/src/pages/cortex/cli/init.mdx
deleted file mode 100644
index 7a95016e8..000000000
--- a/docs/src/pages/cortex/cli/init.mdx
+++ /dev/null
@@ -1,49 +0,0 @@
----
-title: Cortex Models Init
-description: Cortex init command.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex init`
-
-This command initializes the cortex operations settings and downloads the required dependencies to run cortex.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Alias
-The following alias is also available for initializing cortex:
-- `cortex setup`
-
-## Usage
-
-```bash
-cortex init
-```
-
-## Options
-```
-  -h, --help              display help for command
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/kill.mdx b/docs/src/pages/cortex/cli/kill.mdx
deleted file mode 100644
index eba4927dd..000000000
--- a/docs/src/pages/cortex/cli/kill.mdx
+++ /dev/null
@@ -1,45 +0,0 @@
----
-title: Cortex Kill
-description: Cortex kill command.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex kill`
-
-This command stops the currently running cortex processes.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex kill
-```
-
-## Options
-```
-  -h, --help              display help for command
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/models.mdx b/docs/src/pages/cortex/cli/models.mdx
deleted file mode 100644
index 36ffa69a1..000000000
--- a/docs/src/pages/cortex/cli/models.mdx
+++ /dev/null
@@ -1,52 +0,0 @@
----
-title: Cortex Models
-description: Cortex CLI.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models`
-
-This command allows you to start, stop, and manage various model operations within Cortex.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models API_COMMAND [OPTIONS]
-
-# Start a downloaded model
-cortex models start MODEL_ID
-
-# Stop a downloaded model
-cortex models stop MODEL_ID
-```
-
-## Options
-
-```
-  -h, --help     display help for command
-```
diff --git a/docs/src/pages/cortex/cli/models/_meta.json b/docs/src/pages/cortex/cli/models/_meta.json
deleted file mode 100644
index 592f89ac4..000000000
--- a/docs/src/pages/cortex/cli/models/_meta.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "download": {
-    "title": "cortex models pull"
-  },
-  "list": {
-    "title": "cortex models list"
-  },
-  "get": {
-    "title": "cortex models get"
-  },
-  "update": {
-    "title": "cortex models update"
-  },
-  "start": {
-    "title": "cortex models start"
-  },
-  "stop": {
-    "title": "cortex models stop"
-  },
-  "remove": {
-    "title": "cortex models remove"
-  }
-}
diff --git a/docs/src/pages/cortex/cli/models/download.mdx b/docs/src/pages/cortex/cli/models/download.mdx
deleted file mode 100644
index 0adb63cb6..000000000
--- a/docs/src/pages/cortex/cli/models/download.mdx
+++ /dev/null
@@ -1,49 +0,0 @@
----
-title: Cortex Models Pull
-description: Cortex models subcommands.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models pull`
-
-This command downloads a model. You can use a HuggingFace `MODEL_ID` to download a model.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models pull MODEL_ID
-```
-## Alias
-The following alias is also available for downloading models:
-- `cortex models download _`
-
-## Options
-```
-  -m, --model <model_id>  Model Id to start chat with
-  -h, --help              display help for command
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/models/get.mdx b/docs/src/pages/cortex/cli/models/get.mdx
deleted file mode 100644
index 3f7ce2be3..000000000
--- a/docs/src/pages/cortex/cli/models/get.mdx
+++ /dev/null
@@ -1,45 +0,0 @@
----
-title: Cortex Models Get
-description: Cortex models subcommands.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models get`
-
-This command returns a model detail defined by a `MODEL_ID`.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models get MODEL_ID
-```
-
-## Options
-```
-  -h, --help              display help for command
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/models/list.mdx b/docs/src/pages/cortex/cli/models/list.mdx
deleted file mode 100644
index 5ab1389f5..000000000
--- a/docs/src/pages/cortex/cli/models/list.mdx
+++ /dev/null
@@ -1,46 +0,0 @@
----
-title: Cortex Models List
-description: Cortex models subcommands.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models list`
-
-This command lists all local models.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models list
-```
-
-## Options
-```
-  -f, --format <format>  Print models list in table or json format (default: "json")
-  -h, --help             display help for command
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/models/remove.mdx b/docs/src/pages/cortex/cli/models/remove.mdx
deleted file mode 100644
index c972eacae..000000000
--- a/docs/src/pages/cortex/cli/models/remove.mdx
+++ /dev/null
@@ -1,45 +0,0 @@
----
-title: Cortex Models Remove
-description: Cortex models subcommands.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models remove`
-
-This command deletes a local model defined by a `MODEL_ID`.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models remove MODEL_ID
-```
-
-## Options
-```
-  -h, --help              display help for command
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/models/start.mdx b/docs/src/pages/cortex/cli/models/start.mdx
deleted file mode 100644
index 355f20ba3..000000000
--- a/docs/src/pages/cortex/cli/models/start.mdx
+++ /dev/null
@@ -1,46 +0,0 @@
----
-title: Cortex Models Start
-description: Cortex models subcommands.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models start`
-
-This command starts a model defined by a `MODEL_ID`.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models start MODEL_ID
-```
-
-## Options
-```
-  -a, --attach  Attach to interactive chat session (default: false)
-  -h, --help    display help for command
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/models/stop.mdx b/docs/src/pages/cortex/cli/models/stop.mdx
deleted file mode 100644
index a7a95fa00..000000000
--- a/docs/src/pages/cortex/cli/models/stop.mdx
+++ /dev/null
@@ -1,45 +0,0 @@
----
-title: Cortex Models Stop
-description: Cortex models subcommands.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models stop`
-
-This command stops a model defined by a `MODEL_ID`.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models stop MODEL_ID
-```
-
-## Options
-```
-  -h, --help              display help for command
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/models/update.mdx b/docs/src/pages/cortex/cli/models/update.mdx
deleted file mode 100644
index 129fa3634..000000000
--- a/docs/src/pages/cortex/cli/models/update.mdx
+++ /dev/null
@@ -1,48 +0,0 @@
----
-title: Cortex Models Update
-description: Cortex models subcommands.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex models update`
-
-This command updates a model configuration defined by a `MODEL_ID`.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex models update MODEL_ID OPTIONS
-```
-
-## Options
-```
-  -m, --model <model_id>      Model Id to update
-  -c, --options <options...>  Specify the options to update the model. Syntax: -c option1=value1 option2=value2. For
-                              example: cortex models update -c max_tokens=100 temperature=0.5
-  -h, --help                  display help for command
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/ps.mdx b/docs/src/pages/cortex/cli/ps.mdx
deleted file mode 100644
index 4655c25c3..000000000
--- a/docs/src/pages/cortex/cli/ps.mdx
+++ /dev/null
@@ -1,48 +0,0 @@
----
-title: Cortex Ps
-description: Cortex ps command.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex ps`
-
-This command shows the running model and its status.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex ps
-```
-For example, it returns the following table:
-```bash
-┌─────────┬──────────────────────┬───────────────────┬───────────┬──────────┬─────┬──────┐
-│ (index) │ modelId              │ engine            │ status    │ duration │ ram │ vram │
-├─────────┼──────────────────────┼───────────────────┼───────────┼──────────┼─────┼──────┤
-│ 0       │ 'janhq/tinyllama/1b' │ 'cortex.llamacpp' │ 'running' │ '7s'     │ '-' │ '-'  │
-└─────────┴──────────────────────┴───────────────────┴───────────┴──────────┴─────┴──────┘
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/pull.mdx b/docs/src/pages/cortex/cli/pull.mdx
deleted file mode 100644
index 0c71bb666..000000000
--- a/docs/src/pages/cortex/cli/pull.mdx
+++ /dev/null
@@ -1,82 +0,0 @@
----
-title: Cortex Pull
-description: Cortex CLI.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex pull`
-
-This command facilitates downloading machine learning models from various model hubs, including the popular 🤗 [Hugging Face](https://huggingface.co/).
-
-By default, models are downloaded to the `node_modules library path. For additional information on storage paths and options, refer [here](/cortex/cli#storage).
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Alias
-The following alias is also available for downloading models:
-- `cortex download _`
-
-## Usage
-
-### Preconfigured Models
-
-Reconfigured models (with optimal runtime parameters and templates) are available from the [Jan Model Hub](https://huggingface.co/janhq) on Hugging Face.
-
-Models can be downloaded using a Docker-like interface with the following syntax: `repo_name:branch_name`. Each variant may include different quantizations and sizes, typically organized in the repository’s branches.
-
-Available models include [llama3](https://huggingface.co/janhq/llama3), [mistral](https://huggingface.co/janhq/mistral), [tinyllama](https://huggingface.co/janhq/tinyllama), and [many more](https://huggingface.co/janhq).
-
-<Callout type="info">
-New models will soon be added to HuggingFace's janhq repository.
-</Callout>
-
-```bash
-# Pull a specific variant with `repo_name:branch`
-cortex pull llama3:7b
-```
-You can also download `size`, `format`, and `quantization` variants of each model.
-
-```bash
-cortex pull llama3:8b-instruct-v3-gguf-Q4_K_M
-cortex pull llama3:8b-instruct-v3-tensorrt-llm
-```
-<Callout type="info">
-Model variants are provided via the `branches` in each model's Hugging Face repo.
-</Callout>
-### Hugging Face Models
-
-You can download any GGUF, TensorRT, or supported-format model directly from Hugging Face.
-
-```bash
-# cortex pull org_name/repo_name
-cortex pull microsoft/Phi-3-mini-4k-instruct-gguf
-```
-
-## Options
-
-```
-  -h, --help     display help for command
-```
diff --git a/docs/src/pages/cortex/cli/run.mdx b/docs/src/pages/cortex/cli/run.mdx
deleted file mode 100644
index 59067322a..000000000
--- a/docs/src/pages/cortex/cli/run.mdx
+++ /dev/null
@@ -1,53 +0,0 @@
----
-title: Cortex Run
-description: Cortex run command
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex run`
-
-This command facilitates the initiation of an interactive chat shell with a specified machine learning model.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex run MODEL_ID
-```
-### Options
-```
-  -t, --thread <thread_id>  Thread Id. If not provided, will create new thread
-  -h, --help                display help for command
-```
-
-## Command Chain
-
-`cortex run` command is a convenience wrapper that automatically executes a sequence of commands to simplify user interactions:
-
-1. [`cortex start`](/cortex/cli/models/start): This command starts the specified model, making it active and ready for interactions.
-2. [`cortex chat`](/cortex/cli/chat): Following model activation, this command opens an interactive chat shell where users can directly communicate with the model.
-
diff --git a/docs/src/pages/cortex/cli/serve.mdx b/docs/src/pages/cortex/cli/serve.mdx
deleted file mode 100644
index bc46683e9..000000000
--- a/docs/src/pages/cortex/cli/serve.mdx
+++ /dev/null
@@ -1,46 +0,0 @@
----
-title: Cortex Models Serve
-description: Cortex serve command.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# `cortex serve`
-
-This command runs the API endpoint server for the Cortex back-end.
-
-<Callout type="info">
-This command is compatible with all OpenAI and OpenAI-compatible endpoints.
-</Callout>
-
-## Usage
-
-```bash
-cortex serve
-```
-
-## Options
-```
-  -h, --host              configure the host for the API endpoint server
-  -h, --help              display help for command
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/command-line.mdx b/docs/src/pages/cortex/command-line.mdx
deleted file mode 100644
index 5b635cb90..000000000
--- a/docs/src/pages/cortex/command-line.mdx
+++ /dev/null
@@ -1,81 +0,0 @@
----
-title: Command Line Interface
-description: Cortex CLI.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# Command Line Interface
-
-The Cortex CLI provides a user-friendly platform for managing and operating large language models (LLMs), inspired by tools like Docker and GitHub CLI. Designed for straightforward installation and use, it simplifies the integration and management of LLMs.
-
-<Callout type="info">
-The Cortex CLI is OpenAI-compatible.
-</Callout>
-
-## Installation
-To get started with the Cortex CLI, please see our guides:
-- [Quickstart](/cortex/quickstart)
-- [Device specific installation](/cortex/installation)
-
-These resources provide detailed instructions to ensure Cortex is set up correctly on your machine, accommodating various hardware environments.
-
-## Usage
-
-The Cortex CLI has a robust command set that streamlines your LLM interactions.
-
-Check out the [CLI reference pages](/cortex/cli) for a comprehensive guide on all available commands and their specific functions.
-
-## Storage
-
-By default, Cortex CLI stores model binaries, thread history, and other usage data in:
-`$(npm list -g @janhq/cortex)`. 
-
-You can find the respective folders within the `/lib/node_modules/@janhq/cortex/dist/` subdirectory.
-
-<Callout type="info">
-**Ongoing Development**:
-- Customizable Storage Locations
-- Database Integration
-</Callout>
-
-## CLI Syntax
-
-The Cortex CLI improves the developer experience by incorporating command chaining and syntactic enhancements. 
-This approach automatically combines multiple operations into a single command, streamlining complex workflows. It simplifies the execution of extensive processes through integrated commands.
-
-### OpenAI API Equivalence
-
-The design of Cortex CLI commands strictly adheres to the method names used in the OpenAI API as a standard practice. This ensures a smooth transition for users familiar with OpenAI’s system.
-
-For example:
-- The `cortex chat` command is equivalent to the [`POST /v1/chat/completions` endpoint](/cortex/cortex-chat). 
-
-- The `cortex models get ID` command is equivalent to the [`GET /models ${ID}` endpoint](/cortex/cortex-models).
-
-### Command Chaining
-
-Cortex CLI’s command chaining support allows multiple commands to be executed in sequence with a simplified syntax. This approach reduces the complexity of command inputs and speeds up development tasks.
-
-For example:
-- The [`cortex run`](/cortex/cortex-run), inspired by Docker and Github, starts the models and the inference engine, and provides a command line chat interface for easy testing.
diff --git a/docs/src/pages/cortex/cortex-cpp.mdx b/docs/src/pages/cortex/cortex-cpp.mdx
deleted file mode 100644
index bd39939dc..000000000
--- a/docs/src/pages/cortex/cortex-cpp.mdx
+++ /dev/null
@@ -1,77 +0,0 @@
----
-title: Cortex.cpp
-description: Cortex.cpp Architecture
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-# Cortex.cpp
-
-Cortex.cpp is a stateless, C++ server that is 100% compatible with OpenAI API (stateless endpoints). 
-
-It includes a Drogon server, with request queues, model orchestration logic, and hardware telemetry, and more, for prod environments.
-
-This guide walks you through how Cortex.CPP is designed, the codebase structure, and future plans.
-
-## Usage
-
-See [Quickstart](/cortex/quickstart)
-
-## Interface
-
-## Architecture
-
-## Code Structure
-
-```md
-├── app/
-│   │   ├── controllers/
-│   │   ├── models/
-│   │   ├── services/
-│   │   ├── ?engines/
-│   │   │   ├── llama.cpp
-│   │   │   ├── tensorrt-llm
-│   │   │   └── ...
-│   │   └── ...
-│   ├── CMakeLists.txt
-│   ├── config.json
-│   ├── Dockerfile
-│   ├── docker-compose.yml
-│   ├── README.md
-│   └── ...
-```
-
-`cortex-cpp` folder contains stateless implementations, most of which call into `cortex.llamacpp` and `cortex.tensorrt-llm`, depending on the engine at runtime.
-
-Here you will find the implementations for stateless endpoints: 
-- `/chat/completion`
-- `/audio`
-- `/fine_tuning`
-- `/embeddings`
-- `/load_model`
-- `/unload_model`
-
-And core hardware and model management logic like CPU instruction set detection, and multiple model loading logic. 
-
-## Runtime
-
-## Roadmap
diff --git a/docs/src/pages/cortex/cortex-llamacpp.mdx b/docs/src/pages/cortex/cortex-llamacpp.mdx
deleted file mode 100644
index b8e121a35..000000000
--- a/docs/src/pages/cortex/cortex-llamacpp.mdx
+++ /dev/null
@@ -1,143 +0,0 @@
----
-title: Cortex.llamacpp
-description: Cortex.llamacpp Architecture
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-# Cortex.llamacpp
-
-Cortex.llamacpp is a C++ inference library that can be loaded by any server at runtime. It submodules (and occasionally upstreams) [llama.cpp](https://github.com/ggerganov/llama.cpp) for GGUF inference. 
-
-In addition to llama.cpp, cortex.llamacpp adds: 
-- OpenAI compatibility for the stateless endpoints
-- Model orchestration like model warm up and concurrent models
-
-<Callout type="info">
-Cortex.llamacpp is formerly called "Nitro".
-</Callout>
-
-If you already use [Jan](/docs) or [Cortex](/cortex), cortex.llamacpp is bundled by default and you don’t need this guide. This guides walks you through how to use cortex.llamacpp as a standalone library, in any custom C++ server.
-
-## Usage
-
-To include cortex.llamacpp in your own server implementation, follow this [server example](https://github.com/menloresearch/cortex.llamacpp/tree/main/examples/server).
-
-## Interface
-
-Cortex.llamacpp has the following Interfaces:
-
-- **HandleChatCompletion:** Processes chat completion tasks
-    
-    ```cpp
-    void HandleChatCompletion(
-          std::shared_ptr<Json::Value> jsonBody,
-          std::function<void(Json::Value&&, Json::Value&&)>&& callback);
-    ```
-    
-- **HandleEmbedding:** Generates embeddings for the input data provided
-    
-    ```cpp
-    void HandleEmbedding(
-          std::shared_ptr<Json::Value> jsonBody,
-          std::function<void(Json::Value&&, Json::Value&&)>&& callback);
-    ```
-    
-- **LoadModel:** Loads a model based on the specifications
-    
-    ```cpp
-    void LoadModel(
-          std::shared_ptr<Json::Value> jsonBody,
-          std::function<void(Json::Value&&, Json::Value&&)>&& callback);
-    ```
-    
-- **UnloadModel:** Unloads a model as specified
-    
-    ```cpp
-    void UnloadModel(
-          std::shared_ptr<Json::Value> jsonBody,
-          std::function<void(Json::Value&&, Json::Value&&)>&& callback);
-    ```
-    
-- **GetModelStatus:** Retrieves the status of a model
-    
-    ```cpp
-    void GetModelStatus(
-          std::shared_ptr<Json::Value> jsonBody,
-          std::function<void(Json::Value&&, Json::Value&&)>&& callback);
-    ```
-    
-**Parameters:**
-
-- **`jsonBody`**: The request content in JSON format.
-- **`callback`**: A function that handles the response
-
-## Architecture
-
-The main components include:
-- `enginei`: an engine interface definition that extends to all engines, handling endpoint logic and facilitating communication between `cortex.cpp` and `llama engine`.
-- `llama engine`: exposes APIs for embedding and inference. It loads and unloads models and simplifies API calls to `llama.cpp`.
-- `llama.cpp`: submodule from the `llama.cpp` repository that provides the core functionality for embeddings and inferences.
-- `llama server context`: a wrapper offers a simpler and more user-friendly interface for `llama.cpp` APIs
-
-![Cortex llamacpp architecture](./_assets/cortex-llamacpp-arch.png)
-
-### Communication Protocols:
-
-- `Streaming`: Responses are processed and returned one token at a time.
-- `RESTful`: The response is processed as a whole. After the llama server context completes the entire process, it returns a single result back to cortex.cpp.
-
-![Cortex llamacpp architecture](./_assets/cortex-llamacpp-act.png)
-
-## Code Structure
-
-```
-.
-├── base                              # Engine interface definition
-|   └── cortex-common                 # Common interfaces used for all engines
-|      └── enginei.h                  # Define abstract classes and interface methods for engines
-├── examples                          # Server example to integrate engine
-│   └── server.cc                     # Example server demonstrating engine integration
-├── llama.cpp                         # Upstream llama.cpp repository
-│   └── (files from upstream llama.cpp)
-├── src                               # Source implementation for llama.cpp
-│   ├── chat_completion_request.h     # OpenAI compatible request handling
-│   ├── llama_client_slot             # Manage vector of slots for parallel processing
-│   ├── llama_engine                  # Implementation of llamacpp engine for model loading and inference 
-│   ├── llama_server_context          # Context management for chat completion requests
-│   │   ├── slot                      # Struct for slot management
-│   │   └── llama_context             # Struct for llama context management
-|   |   └── chat_completion           # Struct for chat completion management
-|   |   └── embedding                 # Struct for embedding management
-├── third-party                       # Dependencies of the cortex.llamacpp project
-│   └── (list of third-party dependencies)
-```
-
-## Runtime
-
-## Roadmap
-The future plans for Cortex.llamacpp are focused on enhancing performance and expanding capabilities. Key areas of improvement include:
-
-- Performance Enhancements: Optimizing speed and reducing memory usage to ensure efficient processing of tasks.
-- Multimodal Model Compatibility: Expanding support to include a variety of multimodal models, enabling a broader range of applications and use cases.
-
-To follow the latest developments, see the [cortex.llamacpp GitHub](https://github.com/menloresearch/cortex.llamacpp)
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cortex-openvino.mdx b/docs/src/pages/cortex/cortex-openvino.mdx
deleted file mode 100644
index 391902cf2..000000000
--- a/docs/src/pages/cortex/cortex-openvino.mdx
+++ /dev/null
@@ -1,24 +0,0 @@
----
-title: Cortex.OpenVino
-description: Cortex.OpenVino Architecture
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-# Cortex.OpenVino
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cortex-python.mdx b/docs/src/pages/cortex/cortex-python.mdx
deleted file mode 100644
index 2aef1e7ce..000000000
--- a/docs/src/pages/cortex/cortex-python.mdx
+++ /dev/null
@@ -1,24 +0,0 @@
----
-title: Cortex.python
-description: Cortex.python Architecture
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-# Cortex.python
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cortex-tensorrt-llm.mdx b/docs/src/pages/cortex/cortex-tensorrt-llm.mdx
deleted file mode 100644
index e582b1cc8..000000000
--- a/docs/src/pages/cortex/cortex-tensorrt-llm.mdx
+++ /dev/null
@@ -1,24 +0,0 @@
----
-title: Cortex.tensorrt-llm
-description: Cortex.tensorrt-llm Architecture
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-# Cortex.tensorrt-llm
\ No newline at end of file
diff --git a/docs/src/pages/cortex/embeddings.mdx b/docs/src/pages/cortex/embeddings.mdx
deleted file mode 100644
index a84186710..000000000
--- a/docs/src/pages/cortex/embeddings.mdx
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: Embeddings
-description: Embeddings
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/embeddings/overview.mdx b/docs/src/pages/cortex/embeddings/overview.mdx
deleted file mode 100644
index e679cd763..000000000
--- a/docs/src/pages/cortex/embeddings/overview.mdx
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: Overview
-description: Overview.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/error-codes.mdx b/docs/src/pages/cortex/error-codes.mdx
deleted file mode 100644
index 878c4d66a..000000000
--- a/docs/src/pages/cortex/error-codes.mdx
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: Error Codes
-description: Error Codes.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/ext-architecture.mdx b/docs/src/pages/cortex/ext-architecture.mdx
deleted file mode 100644
index c2230c419..000000000
--- a/docs/src/pages/cortex/ext-architecture.mdx
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: Extensions Architecture
-description: Extensions Architecture
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/fine-tuning.mdx b/docs/src/pages/cortex/fine-tuning.mdx
deleted file mode 100644
index 7bf80bc11..000000000
--- a/docs/src/pages/cortex/fine-tuning.mdx
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: Fine Tuning
-description: Fine Tuning
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/fine-tuning/overview.mdx b/docs/src/pages/cortex/fine-tuning/overview.mdx
deleted file mode 100644
index e679cd763..000000000
--- a/docs/src/pages/cortex/fine-tuning/overview.mdx
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: Overview
-description: Overview.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/function-calling.mdx b/docs/src/pages/cortex/function-calling.mdx
deleted file mode 100644
index eca57c982..000000000
--- a/docs/src/pages/cortex/function-calling.mdx
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: Function Calling
-description: Function Calling
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/hardware.mdx b/docs/src/pages/cortex/hardware.mdx
deleted file mode 100644
index fdb16a4e8..000000000
--- a/docs/src/pages/cortex/hardware.mdx
+++ /dev/null
@@ -1,50 +0,0 @@
----
-title: Hardware Requirements
-description: Get started quickly with Jan, a ChatGPT-alternative that runs on your own computer, with a local API server. Learn how to install Jan and select an AI model to start chatting.
-sidebar_position: 2
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    quickstart,
-    getting started,
-    using AI model,
-    installation,
-  ]
----
-
-import { Tabs } from 'nextra/components'
-import { Callout, Steps } from 'nextra/components'
-
-# Hardware Requirements
-
-To run LLMs on device, Cortex has the following hardware requirements:
-<Callout type="info">
-These are the general hardware requirements for running Cortex on your system. Please refer to the respective [installation](/cortex/installation) sections for detailed specifications tailored to each environment.
-
-</Callout>
-
-## OS
-- MacOSX 13.6 or higher.
-- Windows 10 or higher.
-- Ubuntu 12.04 and later.
-
-## RAM (CPU Mode)
-- 8GB for running up to 3B models.
-- 16GB for running up to 7B models.
-- 32GB for running up to 13B models.
-
-## VRAM (GPU Mode)
-- 6GB can load the 3B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
-- 8GB can load the 7B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
-- 12GB can load the 13B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
-
-## Disk Space
-- 10GB: The app is 1.02 MB, but models are usually 4GB+
\ No newline at end of file
diff --git a/docs/src/pages/cortex/index.mdx b/docs/src/pages/cortex/index.mdx
deleted file mode 100644
index 887eee246..000000000
--- a/docs/src/pages/cortex/index.mdx
+++ /dev/null
@@ -1,50 +0,0 @@
----
-title: Cortex
-description: Cortex is an Local LLM engine for developers
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Discord integration,
-    Discord,
-    bot,
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-
-# Cortex
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-![Cortex Cover Image](./_assets/cortex-cover.png)
-
-Cortex is an [OpenAI compatible](https://platform.openai.com/docs/introduction), local AI server that developers can use to build LLM apps. It can be used as a standalone server, or imported as a library. 
-
-Cortex currently supports two inference engines: 
-- Llama.cpp
-- TensorRT-LLM
-
-<Callout>
-  **Real-world Use**: Cortex powers [Jan](/docs), our local ChatGPT-alternative. 
-  
-  Cortex has been battle-tested through 900k downloads, and handles a variety of hardware and software edge cases.
-</Callout>
-
-### Roadmap
-
-Cortex's roadmap is to implement an [OpenAI-equivalent API](https://platform.openai.com/docs/api-reference) using a fully open source stack. Our goal is to make switching to open source AI as easy as possible for developers.
-
-### Architecture
-
-Cortex's [architecture](/cortex/architecture) features C++ inference core, with [higher-order features](/cortex/architecture) handled in Typescript. 
-
-Our [long-term direction](/cortex/roadmap) is to (eventually) move towards being a full C++ library to enable embedded and robotics use cases.
\ No newline at end of file
diff --git a/docs/src/pages/cortex/installation.mdx b/docs/src/pages/cortex/installation.mdx
deleted file mode 100644
index 2c32bac3f..000000000
--- a/docs/src/pages/cortex/installation.mdx
+++ /dev/null
@@ -1,37 +0,0 @@
----
-title: Desktop Installation
-description: Cortex Desktop Installation.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-import childPages from './installation/_meta.json';
-
-# Cortex Desktop Installation
-
-<br/>
-
-<Cards
-  children={Object.keys(childPages).map((key, i) => (
-    <Card
-      key={i}
-      title={childPages[key].title}
-      href={childPages[key].href}
-    />
-  ))}
-/>
\ No newline at end of file
diff --git a/docs/src/pages/cortex/installation/_meta.json b/docs/src/pages/cortex/installation/_meta.json
deleted file mode 100644
index 4929f731a..000000000
--- a/docs/src/pages/cortex/installation/_meta.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "mac": {
-    "title": "Mac",
-    "href": "/cortex/installation/mac"
-  },
-  "windows": {
-    "title": "Windows",
-    "href": "/cortex/installation/windows"
-  },
-  "linux": {
-    "title": "Linux",
-    "href": "/cortex/installation/linux"
-  }
-}
diff --git a/docs/src/pages/cortex/installation/linux.mdx b/docs/src/pages/cortex/installation/linux.mdx
deleted file mode 100644
index 90f396811..000000000
--- a/docs/src/pages/cortex/installation/linux.mdx
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: Linux
-description: Install Cortex CLI on Linux.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    quickstart,
-    getting started,
-    using AI model,
-    installation,
-    "desktop"
-  ]
----
-
-import { Tabs, Steps } from 'nextra/components'
-import { Callout } from 'nextra/components'
-import FAQBox from '@/components/FaqBox'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-# Linux Installation
-## Prerequisites
-
-### Dependencies
-
-Before installation, ensure that you have installed the following:
-
-- **Node.js**: Required for running the installation.
-- **NPM**: Needed to manage packages.
-
-<Callout type="info">
-The **CPU instruction sets** are not required for the initial installation of Cortex. This dependency will be automatically installed during the Cortex initialization if they are not already on your system.
-
-</Callout>
-
-### Hardware
-
-Ensure that your system meets the following requirements to run Cortex:
-<Tabs items={['OS', 'CPU', 'RAM', 'GPU', 'Disk']}>
-<Tabs.Tab>
-- Debian-based (Supports `.deb` and `AppImage` )
-    - Ubuntu-based
-        - Ubuntu Desktop LTS (official)/ Ubuntu Server LTS (only for server)
-        - Edubuntu (Mainly desktop)
-        - Kubuntu (Desktop only)
-        - Lubuntu (Both desktop and server, though mainly desktop)
-        - Ubuntu Budgie (Mainly desktop)
-        - Ubuntu Cinnamon (Desktop only)
-        - Ubuntu Kylin (Both desktop and server)
-        - Ubuntu MATE (Desktop only)
-- Pacman-based
-    - Arch Linux based
-        - Arch Linux (Mainly desktop)
-        - SteamOS (Desktop only)
-- RPM-based (Supports `.rpm` and `AppImage` )
-- Fedora-based
-    - RHEL-based (Server only)
-- openSUSE (Both desktop and server)
-
-    <Callout type="info">
-    - Please check whether your Linux distribution supports desktop, server, or both environments.
-    
-    </Callout>
-</Tabs.Tab>
-<Tabs.Tab>
-<Tabs items={['Intel', 'AMD']}>
-<Tabs.Tab>
-<Callout type="info">
-- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
-- We support older processors with AVX and AVX-512, though this is not recommended.
-</Callout>
-- Haswell processors (Q2 2013) and newer.
-- Tiger Lake (Q3 2020) and newer for Celeron and Pentium processors.
-</Tabs.Tab>
-<Tabs.Tab>
-<Callout type="info">
-- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
-- We support older processors with AVX and AVX-512, though this is not recommended.
-</Callout>
-- Excavator processors (Q2 2015) and newer.
-</Tabs.Tab>
-</Tabs>
-</Tabs.Tab>
-<Tabs.Tab>
-- 8GB for running up to 3B models (int4).
-- 16GB for running up to 7B models (int4).
-- 32GB for running up to 13B models (int4).
-
-<Callout type="info">
-We support DDR2 RAM as the minimum requirement but recommend using newer generations of RAM for improved performance.
-
-</Callout>
-</Tabs.Tab>
-<Tabs.Tab>    
-- 6GB can load the 3B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
-- 8GB can load the 7B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
-- 12GB can load the 13B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
-    
-<Callout type="info">
-Having at least 6GB VRAM when using NVIDIA, AMD, or Intel Arc GPUs is recommended.
-
-</Callout>
-</Tabs.Tab>
-<Tabs.Tab>     
-- At least 10GB for app storage and model download.
-</Tabs.Tab>
-</Tabs>
-
-## Cortex Installation
-
-To install Cortex, follow the steps below:
-
-<Steps>
-### Step 1: Install Cortex
-
-Run the following command to install Cortex globally on your machine:
-
-<Callout type="info">
-Install NPM on your machine before proceeding with this step.
-
-</Callout>
-
-```sh
-# Install globally on your system
-npm i -g @janhq/cortex
-```
-<Callout type="info">
-Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
-
-</Callout>
-
-### Step 2: Verify the Installation
-
-1. After installation, you can verify that Cortex is installed correctly by getting help information.
-
-```sh
-# Get the help information
-cortex -h
-```
-2. Cortex is ready to use!
-</Steps>
-## Build from Source
-
-To install Cortex from the source, follow the steps below:
-
-1. Clone the Cortex repository [here](https://github.com/menloresearch/cortex/tree/dev).
-2. Navigate to the `cortex-js` folder.
-3. Open the terminal and run the following command to build the Cortex project:
-
-```sh
-npx nest build
-```
-
-4. Make the `command.js` executable:
-
-```sh
-chmod +x '[path-to]/cortex/cortex-js/dist/src/command.js'
-```
-
-5. Link the package globally:
-
-```sh
-npm link
-```
-6. Initialize Cortex by following the steps [here](#step-3-initialize-cortex).
-## Uninstall Cortex
-
-Run the following command to uninstall Cortex globally on your machine:
-```sh
-# Uninstall globally on your system
-npm uninstall -g @janhq/cortex
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/installation/mac.mdx b/docs/src/pages/cortex/installation/mac.mdx
deleted file mode 100644
index e3afaaf5d..000000000
--- a/docs/src/pages/cortex/installation/mac.mdx
+++ /dev/null
@@ -1,147 +0,0 @@
----
-title: Mac
-description: Install Cortex CLI on Mac.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    quickstart,
-    getting started,
-    using AI model,
-    installation,
-    "desktop"
-  ]
----
-
-import { Tabs, Steps } from 'nextra/components'
-import { Callout } from 'nextra/components'
-import FAQBox from '@/components/FaqBox'
-
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-# Mac Installation
-## Prerequisites
-
-### Dependencies
-
-Before installation, ensure that you have installed the following:
-
-- **Node.js**: Required for running the installation.
-- **NPM**: Needed to manage packages.
-
-<Callout type="info">
-The **CPU instruction sets** are not required for the initial installation of Cortex. This dependency will be automatically installed during the Cortex initialization if they are not already on your system.
-
-</Callout>
-
-### Hardware
-
-Ensure that your system meets the following requirements to run Cortex:
-<Tabs items={['Mac Intel CPU', 'Mac Apple Silicon']}>
-<Tabs.Tab>
-<Tabs items={['Operating System', 'Memory', 'Disk']}>
-<Tabs.Tab>
-- MacOSX 13.6 or higher.
-</Tabs.Tab>
-<Tabs.Tab>
-- 8GB for running up to 3B models.
-- 16GB for running up to 7B models.
-- 32GB for running up to 13B models.
-</Tabs.Tab>
-<Tabs.Tab>
-- At least 10GB for app and model download.
-</Tabs.Tab>
-</Tabs>
-</Tabs.Tab>
-<Tabs.Tab>
-<Tabs items={['Operating System', 'Memory', 'Disk']}>
-<Tabs.Tab>
-- MacOSX 13.6 or higher.
-</Tabs.Tab>
-<Tabs.Tab>
-- 8GB for running up to 3B models.
-- 16GB for running up to 7B models.
-- 32GB for running up to 13B models.
-<Callout type="info">
-Apple Silicon Macs leverage Metal for GPU acceleration, providing faster performance than Intel Macs, which rely solely on CPU processing.
-
-</Callout>
-</Tabs.Tab>
-<Tabs.Tab>
-- At least 10GB for app and model download.
-</Tabs.Tab>
-</Tabs>
-</Tabs.Tab>
-</Tabs>
-## Cortex Installation
-
-To install Cortex, follow the steps below:
-<Steps>
-### Step 1: Install Cortex
-
-Run the following command to install Cortex globally on your machine:
-
-<Callout type="info">
-Install NPM on your machine before proceeding with this step.
-
-</Callout>
-
-```sh
-# Install globally on your system
-npm i -g @janhq/cortex
-```
-<Callout type="info">
-Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
-
-</Callout>
-
-### Step 2: Verify the Installation
-
-1. After installation, you can verify that Cortex is installed correctly by getting help information.
-
-```sh
-# Get the help information
-cortex -h
-```
-2. Cortex is ready to use!
-</Steps>
-## Build from Source
-
-To install Cortex from the source, follow the steps below:
-
-1. Clone the Cortex repository [here](https://github.com/menloresearch/cortex/tree/dev).
-2. Navigate to the `cortex-js` folder.
-3. Open the terminal and run the following command to build the Cortex project:
-
-```sh
-npx nest build
-```
-
-4. Make the `command.js` executable:
-
-```sh
-chmod +x '[path-to]/cortex/cortex-js/dist/src/command.js'
-```
-
-5. Link the package globally:
-
-```sh
-npm link
-```
-6. Initialize Cortex by following the steps [here](#step-3-initialize-cortex).
-## Uninstall Cortex
-
-Run the following command to uninstall Cortex globally on your machine:
-```sh
-# Uninstall globally using NPM
-npm uninstall -g @janhq/cortex
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/installation/windows.mdx b/docs/src/pages/cortex/installation/windows.mdx
deleted file mode 100644
index 326288bfd..000000000
--- a/docs/src/pages/cortex/installation/windows.mdx
+++ /dev/null
@@ -1,198 +0,0 @@
----
-title: Windows
-description: Install Cortex CLI on Windows.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    quickstart,
-    getting started,
-    using AI model,
-    installation,
-    "desktop"
-  ]
----
-
-import { Tabs, Steps } from 'nextra/components'
-import { Callout } from 'nextra/components'
-import FAQBox from '@/components/FaqBox'
-
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-# Windows Installation
-
-## Prerequisites
-
-### Dependencies
-
-Before installation, ensure that you have installed the following:
-
-- **Node.js**: Required for running the installation.
-- **NPM**: Needed to manage packages.
-- **Windows Subsystem Linux (Ubuntu)**: Required to install for WSL2 installation.
-
-<Callout type="info">
-The **CPU instruction sets** are not required for the initial installation of Cortex. This dependency will be automatically installed during the Cortex initialization if they are not already on your system.
-
-</Callout>
-
-### Hardware
-
-Ensure that your system meets the following requirements to run Cortex:
-<Tabs items={['OS', 'CPU', 'RAM', 'GPU', 'Disk']}>
-<Tabs.Tab>
-- Windows 10 or higher.
-</Tabs.Tab>
-<Tabs.Tab>
-<Tabs items={['Intel', 'AMD']}>
-<Tabs.Tab>
-<Callout type="info">
-- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
-- We support older processors with AVX and AVX-512, though this is not recommended.
-</Callout>
-- Haswell processors (Q2 2013) and newer.
-- Tiger Lake (Q3 2020) and newer for Celeron and Pentium processors.
-</Tabs.Tab>
-<Tabs.Tab>
-<Callout type="info">
-- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
-- We support older processors with AVX and AVX-512, though this is not recommended.
-</Callout>
-- Excavator processors (Q2 2015) and newer.
-</Tabs.Tab>
-</Tabs>
-</Tabs.Tab>
-<Tabs.Tab>
-- 8GB for running up to 3B models (int4).
-- 16GB for running up to 7B models (int4).
-- 32GB for running up to 13B models (int4).
-
-<Callout type="info">
-We support DDR2 RAM as the minimum requirement but recommend using newer generations of RAM for improved performance.
-
-</Callout>
-</Tabs.Tab>
-<Tabs.Tab>    
-- 6GB can load the 3B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
-- 8GB can load the 7B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
-- 12GB can load the 13B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
-    
-<Callout type="info">
-Having at least 6GB VRAM when using NVIDIA, AMD, or Intel Arc GPUs is recommended.
-
-</Callout>
-</Tabs.Tab>
-<Tabs.Tab>     
-- At least 10GB for app storage and model download.
-</Tabs.Tab>
-</Tabs>
-
-## Cortex Installation
-
-To install Cortex, follow the steps below:
-
-<Steps>
-### Step 1: Install Cortex
-
-Run the following command to install Cortex globally on your machine:
-
-<Callout type="info">
-Install NPM on your machine before proceeding with this step.
-
-</Callout>
-
-```sh
-# Install globally on your system
-npm i -g @janhq/cortex
-```
-<Callout type="info">
-Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
-
-</Callout>
-
-### Step 2: Verify the Installation
-
-1. After installation, you can verify that Cortex is installed correctly by getting help information.
-
-```sh
-# Get the help information
-cortex -h
-```
-2. Cortex is ready to use!
-</Steps>
-## Windows Subsystem Linux
-
-To install Cortex using the NPM package in WSL2, follow the steps below:
-<Steps>
-### Step 1: Open your WSL2 Terminal
-
-Open your Linux terminal in WSL2. For WSL2, you can use the Linux distribution terminal, which is Ubuntu.
-
-### Step 2: Install Cortex
-
-Run the following command to install Cortex globally on your machine:
-
-<Callout type="info">
-Install NPM on your machine before proceeding with this step.
-
-</Callout>
-
-```sh
-# Install globally on your system
-npm i -g @janhq/cortex
-```
-<Callout type="info">
-Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
-
-</Callout>
-
-### Step 3: Verify the Installation
-
-After installation, you can verify that Cortex is installed correctly by getting help information.
-
-```sh
-# Get the help information
-cortex -h
-```
-</Steps>
-## Build from Source
-
-To install Cortex from the source, follow the steps below:
-
-1. Clone the Cortex repository [here](https://github.com/menloresearch/cortex/tree/dev).
-2. Navigate to the `cortex-js` folder.
-3. Open the terminal and run the following command to build the Cortex project:
-
-```sh
-npx nest build
-```
-
-4. Make the `command.js` executable:
-
-```sh
-node "[path-to]\cortex\cortex-js\dist\src\command.js"
-```
-
-5. Link the package globally:
-
-```sh
-npm link
-```
-6. Initialize Cortex by following the steps [here](#step-3-initialize-cortex).
-
-## Uninstall Cortex
-
-Run the following command to uninstall Cortex globally on your machine:
-```sh
-# Uninstall globally on your system
-npm uninstall -g @janhq/cortex
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/model-operations.mdx b/docs/src/pages/cortex/model-operations.mdx
deleted file mode 100644
index 5731fe34c..000000000
--- a/docs/src/pages/cortex/model-operations.mdx
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: Model Operations
-description: Model Operations
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/model-operations/overview.mdx b/docs/src/pages/cortex/model-operations/overview.mdx
deleted file mode 100644
index e679cd763..000000000
--- a/docs/src/pages/cortex/model-operations/overview.mdx
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: Overview
-description: Overview.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/py-library.mdx b/docs/src/pages/cortex/py-library.mdx
deleted file mode 100644
index 337bd5ad5..000000000
--- a/docs/src/pages/cortex/py-library.mdx
+++ /dev/null
@@ -1,69 +0,0 @@
----
-title: Python Library
-description: Cortex Python Library.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# Python Library
-Cortex also provides a Python client library that is a **direct substitute for OpenAI's** [Python library](https://github.com/openai/openai-python), enabling easy integration and streamlined workflows.
-
-## Installation
-Use the following pip command to install the Cortex library in your project:
-```py
-pip install @janhq/cortex-python
-```
-## Usage
-
-Switching to the Cortex Client Library from the OpenAI Python Library involves simple updates.
-1. Replace the OpenAI import with Cortex in your application:
-```diff
-- from openai import OpenAI
-+ from @janhq/cortex-python import Cortex
-```
-2. Modify the initialization of the client to use Cortex:
-```diff
-- client = OpenAI(api_key='your-api-key')
-+ client = Cortex(base_url="BASE_URL", api_key="API_KEY")  # This can be omitted if using the default
-
-```
-### Example Usage
-```py
-from @janhq/cortex-python import Cortex
-
-client = OpenAI(base_url="http://localhost:1337", api_key="cortex")
-
-model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
-client.models.start(model=model)
-
-completion = client.chat.completions.create(
-    model=model,
-    messages=[
-        {
-            "role": "user",
-            "content": "Say this is a test",
-        },
-    ],
-)
-print(completion.choices[0].message.content)
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/quickstart.mdx b/docs/src/pages/cortex/quickstart.mdx
deleted file mode 100644
index 4b520a20c..000000000
--- a/docs/src/pages/cortex/quickstart.mdx
+++ /dev/null
@@ -1,55 +0,0 @@
----
-title: Quickstart
-description: Cortex Quickstart.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-# Quickstart
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-To get started,  confirm that your system meets the [hardware requirements](/cortex/hardware), and follow the steps below:
-
-```bash
-# 1. Install Cortex using NPM
-npm i -g @janhq/cortex
-
-# 2. Download a GGUF model
-cortex models pull llama3
-
-# 3. Run the model to start chatting
-cortex models run llama3
-
-# 4. (Optional) Run Cortex in OpenAI-compatible server mode
-cortex serve
-```
-<Callout type="info">
-For more details regarding the Cortex server mode, please see here:
-- [Server Endpoint](/cortex/server)
-- [`cortex serve` command](/cortex/cli/serve)
-</Callout>
-
-## What's Next?
-With Cortex now fully operational, you're ready to delve deeper:
-- Explore how to [install Cortex](/cortex/installation) across various hardware environments.
-- Familiarize yourself with the comprehensive set of [Cortex CLI commands](/cortex/cli) available for use.
-- Gain insights into the system’s design by examining the [architecture](/cortex/architecture) of Cortex.
\ No newline at end of file
diff --git a/docs/src/pages/cortex/rag.mdx b/docs/src/pages/cortex/rag.mdx
deleted file mode 100644
index 89ab5aecd..000000000
--- a/docs/src/pages/cortex/rag.mdx
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: RAG
-description: RAG
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/rag/overview.mdx b/docs/src/pages/cortex/rag/overview.mdx
deleted file mode 100644
index e679cd763..000000000
--- a/docs/src/pages/cortex/rag/overview.mdx
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: Overview
-description: Overview.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/server.mdx b/docs/src/pages/cortex/server.mdx
deleted file mode 100644
index b12661234..000000000
--- a/docs/src/pages/cortex/server.mdx
+++ /dev/null
@@ -1,47 +0,0 @@
----
-title: Command Line Interface
-description: Cortex CLI.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps, Cards, Card } from 'nextra/components'
-import OAICoverage from "@/components/OAICoverage"
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# Server Endpoint
-
-Cortex can run in headless server mode, providing an [OpenAI-API compatible](https://platform.openai.com/docs/api-reference/introduction) endpoint.
-
-## Usage
-
-```
-cortex serve
-```
-
-A full, local AI server will be started on port `7331` (customizable).
-
-## Playground
-
-You can open up an interactive playground at: http://localhost:1337/api, generated from Swagger.
-
-
-## OpenAI Coverage
-
-<OAICoverage endDate='06-21-2024' />
\ No newline at end of file
diff --git a/docs/src/pages/cortex/text-generation.mdx b/docs/src/pages/cortex/text-generation.mdx
deleted file mode 100644
index 9e903bc87..000000000
--- a/docs/src/pages/cortex/text-generation.mdx
+++ /dev/null
@@ -1,86 +0,0 @@
----
-title: Overview
-description: Overview.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-import { Tabs } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-# Text Generation
-
-Cortex's Chat API is compatible with OpenAI’s [Chat Completions](https://platform.openai.com/docs/api-reference/chat) endpoint. It is a drop-in replacement for local inference.
-
-For local inference, Cortex is [multi-engine](#multiple-local-engines) and supports the following model formats: 
-
-- `GGUF`: A generalizable LLM format that runs across CPUs and GPUs. Cortex implements a GGUF runtime through [llama.cpp](https://github.com/ggerganov/llama.cpp/).
-- `TensorRT`: A a production-ready, enterprise-grade LLM format optimized for fast inference on NVIDIA GPUs. Cortex implements a TensorRT runtime through [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM).
-
-For remote inference, Cortex routes requests to multiple APIs, while providing a single, easy to use, OpenAI compatible endpoint. [Read more](#remote-api-integration). 
-
-## Usage
-
-<Tabs items={['CLI', 'Javascript', 'CURL']}>
-<Tabs.Tab>
-
-```bash
-# Streaming
-cortex chat --model janhq/TinyLlama-1.1B-Chat-v1.0-GGUF
-```
-</Tabs.Tab>
-</Tabs>
-
-**Read more:** 
-
-- Chat Completion Object
-- Chat Completions API
-- Chat Completions CLI
-
-## Capabilities
-
-### Multiple Local Engines
-
-Cortex scales applications from prototype to production. It runs on CPU-only laptops with Llama.cpp and GPU-accelerated clusters with TensorRT-LLM.
-
-To learn more about how to configure each engine:
-
-- Use llama.cpp
-- Use tensorrt-llm
-
-To learn more about our engine architecture:
-
-- cortex.cpp
-- cortex.llamacpp
-- cortex.tensorRTLLM
-
-### Multiple Remote APIs
-
-Cortex also works as an aggregator to make remote inference requests from a single endpoint. 
-
-Currently, Cortex supports: 
-- OpenAI
-- Groq
-- Cohere
-- Anthropic
-- MistralAI
-- Martian
-- OpenRouter
-
diff --git a/docs/src/pages/cortex/ts-library.mdx b/docs/src/pages/cortex/ts-library.mdx
deleted file mode 100644
index 1344050b5..000000000
--- a/docs/src/pages/cortex/ts-library.mdx
+++ /dev/null
@@ -1,66 +0,0 @@
----
-title: Typescript Library
-description: Cortex Node Client Library
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Callout type="warning">
-🚧 Cortex is under construction.
-</Callout>
-
-# Typescript Library
-Cortex provides a robust Typescript client library designed as a **direct substitute for OpenAI's** [Node.js/Typescript library](https://github.com/openai/openai-node), enabling easy integration and streamlined workflows.
-
-## Installation
-Install the package via npm with the following command in your project:
-```js
-npm install @janhq/cortex-node
-```
-
-## Usage
-
-Transitioning to the Cortex Client Library from the OpenAI Client Library involves minimal changes, mostly updating the import statement.
-1. Replace the OpenAI import with Cortex in your application:
-```diff
-- import OpenAI from 'openai';
-+ import { Cortex } from '@janhq/cortex-node';
-```
-2. Modify the initialization of the client to use Cortex:
-```diff
-- const openai = new OpenAI({
-+ const cortex = new Cortex({
-    baseURL: ['BASE_URL'], // The default base URL for Cortex is 'http://localhost:1337'
-    apiKey: process.env['OPENAI_API_KEY'], // This can be omitted if using the default
-});
-
-```
-### Example Usage
-```js
-import { Cortex } from '@janhq/cortex-node';
-
-const cortex = new Cortex({
-    baseURL: ['http://localhost:1337'],
-    apiKey: process.env['cortex'], 
-});
-
-cortex.models.start('llama3:7b')
-cortex.models.stop('llama3:7b')
-cortex.threads.list()
-```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/vision.mdx b/docs/src/pages/cortex/vision.mdx
deleted file mode 100644
index 8701ffbd1..000000000
--- a/docs/src/pages/cortex/vision.mdx
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: Vision
-description: Vision
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/vision/overview.mdx b/docs/src/pages/cortex/vision/overview.mdx
deleted file mode 100644
index e679cd763..000000000
--- a/docs/src/pages/cortex/vision/overview.mdx
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: Overview
-description: Overview.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Cortex,
-    Jan,
-    LLMs
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/docs/_assets/add_assistant.png b/docs/src/pages/docs/_assets/add_assistant.png
new file mode 100644
index 000000000..b8f3defb5
Binary files /dev/null and b/docs/src/pages/docs/_assets/add_assistant.png differ
diff --git a/docs/src/pages/docs/_assets/api-server.png b/docs/src/pages/docs/_assets/api-server.png
index 2d62edbe5..e25d0a5bb 100644
Binary files a/docs/src/pages/docs/_assets/api-server.png and b/docs/src/pages/docs/_assets/api-server.png differ
diff --git a/docs/src/pages/docs/_assets/cohere.png b/docs/src/pages/docs/_assets/cohere.png
index 0d8f7f010..f7d24d74c 100644
Binary files a/docs/src/pages/docs/_assets/cohere.png and b/docs/src/pages/docs/_assets/cohere.png differ
diff --git a/docs/src/pages/docs/_assets/google.png b/docs/src/pages/docs/_assets/google.png
index 8a99146b7..751c473d7 100644
Binary files a/docs/src/pages/docs/_assets/google.png and b/docs/src/pages/docs/_assets/google.png differ
diff --git a/docs/src/pages/docs/_assets/gpu_accl.png b/docs/src/pages/docs/_assets/gpu_accl.png
new file mode 100644
index 000000000..d1cfca99f
Binary files /dev/null and b/docs/src/pages/docs/_assets/gpu_accl.png differ
diff --git a/docs/src/pages/docs/_assets/groq.png b/docs/src/pages/docs/_assets/groq.png
index cef77e7de..c2f87ecdd 100644
Binary files a/docs/src/pages/docs/_assets/groq.png and b/docs/src/pages/docs/_assets/groq.png differ
diff --git a/docs/src/pages/docs/_assets/hf-unsloth.png b/docs/src/pages/docs/_assets/hf-unsloth.png
new file mode 100644
index 000000000..2e3edb422
Binary files /dev/null and b/docs/src/pages/docs/_assets/hf-unsloth.png differ
diff --git a/docs/src/pages/docs/_assets/hf_and_jan.png b/docs/src/pages/docs/_assets/hf_and_jan.png
new file mode 100644
index 000000000..5bc324a29
Binary files /dev/null and b/docs/src/pages/docs/_assets/hf_and_jan.png differ
diff --git a/docs/src/pages/docs/_assets/hf_token.png b/docs/src/pages/docs/_assets/hf_token.png
new file mode 100644
index 000000000..49b16b25a
Binary files /dev/null and b/docs/src/pages/docs/_assets/hf_token.png differ
diff --git a/docs/src/pages/docs/_assets/jan_ui.png b/docs/src/pages/docs/_assets/jan_ui.png
new file mode 100644
index 000000000..01d3568df
Binary files /dev/null and b/docs/src/pages/docs/_assets/jan_ui.png differ
diff --git a/docs/src/pages/docs/_assets/llama.cpp-01.png b/docs/src/pages/docs/_assets/llama.cpp-01.png
index 2c8373043..95b4f550e 100644
Binary files a/docs/src/pages/docs/_assets/llama.cpp-01.png and b/docs/src/pages/docs/_assets/llama.cpp-01.png differ
diff --git a/docs/src/pages/docs/_assets/mistralai.png b/docs/src/pages/docs/_assets/mistralai.png
index 98c550680..98fb13eb5 100644
Binary files a/docs/src/pages/docs/_assets/mistralai.png and b/docs/src/pages/docs/_assets/mistralai.png differ
diff --git a/docs/src/pages/docs/_assets/model-import-04.png b/docs/src/pages/docs/_assets/model-import-04.png
new file mode 100644
index 000000000..5b35c816d
Binary files /dev/null and b/docs/src/pages/docs/_assets/model-import-04.png differ
diff --git a/docs/src/pages/docs/_assets/model-import-05.png b/docs/src/pages/docs/_assets/model-import-05.png
new file mode 100644
index 000000000..9d54e1559
Binary files /dev/null and b/docs/src/pages/docs/_assets/model-import-05.png differ
diff --git a/docs/src/pages/docs/_assets/model-management-02.png b/docs/src/pages/docs/_assets/model-management-02.png
index 3fc2e7b37..94e933c6f 100644
Binary files a/docs/src/pages/docs/_assets/model-management-02.png and b/docs/src/pages/docs/_assets/model-management-02.png differ
diff --git a/docs/src/pages/docs/_assets/model-management-04.png b/docs/src/pages/docs/_assets/model-management-04.png
index 30117ff1b..be20e984d 100644
Binary files a/docs/src/pages/docs/_assets/model-management-04.png and b/docs/src/pages/docs/_assets/model-management-04.png differ
diff --git a/docs/src/pages/docs/_assets/model-management-05.png b/docs/src/pages/docs/_assets/model-management-05.png
index 85676ccd2..4c817aecc 100644
Binary files a/docs/src/pages/docs/_assets/model-management-05.png and b/docs/src/pages/docs/_assets/model-management-05.png differ
diff --git a/docs/src/pages/docs/_assets/model-management-06.png b/docs/src/pages/docs/_assets/model-management-06.png
index eced32503..e0e33cbcb 100644
Binary files a/docs/src/pages/docs/_assets/model-management-06.png and b/docs/src/pages/docs/_assets/model-management-06.png differ
diff --git a/docs/src/pages/docs/_assets/model-parameters.png b/docs/src/pages/docs/_assets/model-parameters.png
index 2d4c12ec2..777b013d5 100644
Binary files a/docs/src/pages/docs/_assets/model-parameters.png and b/docs/src/pages/docs/_assets/model-parameters.png differ
diff --git a/docs/src/pages/docs/_assets/openai.png b/docs/src/pages/docs/_assets/openai.png
index 6c489c4dd..eb3160982 100644
Binary files a/docs/src/pages/docs/_assets/openai.png and b/docs/src/pages/docs/_assets/openai.png differ
diff --git a/docs/src/pages/docs/_assets/openrouter.png b/docs/src/pages/docs/_assets/openrouter.png
index 3e4c6b472..3cb114c92 100644
Binary files a/docs/src/pages/docs/_assets/openrouter.png and b/docs/src/pages/docs/_assets/openrouter.png differ
diff --git a/docs/src/pages/docs/_assets/quick-start-03.png b/docs/src/pages/docs/_assets/quick-start-03.png
index 49cf3064d..3938f0fd1 100644
Binary files a/docs/src/pages/docs/_assets/quick-start-03.png and b/docs/src/pages/docs/_assets/quick-start-03.png differ
diff --git a/docs/src/pages/docs/_assets/settings-04.png b/docs/src/pages/docs/_assets/settings-04.png
index 72198ced5..f2dc4b2ec 100644
Binary files a/docs/src/pages/docs/_assets/settings-04.png and b/docs/src/pages/docs/_assets/settings-04.png differ
diff --git a/docs/src/pages/docs/_assets/settings-11.png b/docs/src/pages/docs/_assets/settings-11.png
index 8f44e83ec..2fb3fe441 100644
Binary files a/docs/src/pages/docs/_assets/settings-11.png and b/docs/src/pages/docs/_assets/settings-11.png differ
diff --git a/docs/src/pages/docs/_assets/sys_monitor.png b/docs/src/pages/docs/_assets/sys_monitor.png
new file mode 100644
index 000000000..85a8676a5
Binary files /dev/null and b/docs/src/pages/docs/_assets/sys_monitor.png differ
diff --git a/docs/src/pages/docs/_assets/together.png b/docs/src/pages/docs/_assets/together.png
new file mode 100644
index 000000000..bc2fd9d49
Binary files /dev/null and b/docs/src/pages/docs/_assets/together.png differ
diff --git a/docs/src/pages/docs/_assets/trouble-shooting-01.png b/docs/src/pages/docs/_assets/trouble-shooting-01.png
index 79a6ff0d6..22d1a6d68 100644
Binary files a/docs/src/pages/docs/_assets/trouble-shooting-01.png and b/docs/src/pages/docs/_assets/trouble-shooting-01.png differ
diff --git a/docs/src/pages/docs/_assets/trouble-shooting-02.png b/docs/src/pages/docs/_assets/trouble-shooting-02.png
index 7c1a2d296..8c61e99d4 100644
Binary files a/docs/src/pages/docs/_assets/trouble-shooting-02.png and b/docs/src/pages/docs/_assets/trouble-shooting-02.png differ
diff --git a/docs/src/pages/docs/_meta.json b/docs/src/pages/docs/_meta.json
index 3f4deb69b..bf3ddb2fe 100644
--- a/docs/src/pages/docs/_meta.json
+++ b/docs/src/pages/docs/_meta.json
@@ -1,17 +1,30 @@
 {
-  "-- Switcher": {
-    "type": "separator",
-    "title": "Switcher"
-  },
-  "get-started": {
-    "title": "GET STARTED",
+  "index": "Overview",
+  "how-to-separator": {
+    "title": "HOW TO",
     "type": "separator"
   },
-  "index": "Overview",
-  "quickstart": {
-    "title": "Quickstart"
+  "installation": "Install 👋 Jan",
+  "threads": "Start Chatting",
+  "manage-models": "Manage Models",
+  "assistants": "Create Assistants",
+
+  "tutorials-separators": {
+    "title": "TUTORIALS",
+    "type": "separator"
   },
-  "desktop": "Installation",
+  "quickstart": "Quickstart",
+  "remote-models": "Connect to Remote Models",
+  "custom-providers": "Add Custom Providers",
+  "server-examples": "Provide AI to Tools",
+  "mcp": "Model Context Protocol",
+
+  "explanation-separator": {
+    "title": "EXPLANATION",
+    "type": "separator"
+  },
+  "llama-cpp": "Local AI Engine",
+  "api-server": "Server Overview",
   "data-folder": "Jan Data Folder",
   "privacy": "Privacy",
   "privacy-policy": {
@@ -19,39 +32,12 @@
     "display": "hidden",
     "title": "Privacy Policy"
   },
-  "user-guides": {
-    "title": "BASIC USAGE",
+
+  "reference-separator": {
+    "title": "REFERENCE",
     "type": "separator"
   },
-  "models": "Models",
-  "tools": "Tools",
-  "assistants": "Assistants",
-  "threads": "Chats",
   "settings": "Settings",
-  "api-server": "Local API Server",
-  "inference-engines": {
-    "title": "ENGINES",
-    "type": "separator"
-  },
-  "local-engines": "Local Engines",
-  "remote-models": "Remote Engines",
-  "install-engines": "Install Engines",
-  "extensions-separator": {
-    "title": "EXTENSIONS",
-    "type": "separator"
-  },
-  "extensions": "Overview",
-  "extensions-settings": "Extension Settings",
-  "configure-extensions": "Configure Extensions",
-  "install-extensions": "Install Extensions",
-  "troubleshooting-separator": {
-    "title": "TROUBLESHOOTING",
-    "type": "separator"
-  },
   "troubleshooting": "Troubleshooting",
-  "error-codes": {
-    "type": "page",
-    "display": "hidden",
-    "title": "Error Codes"
-  }
+  "model-parameters": "Model Parameters"
 }
diff --git a/docs/src/pages/docs/api-server.mdx b/docs/src/pages/docs/api-server.mdx
index 0be8a59a1..4f917dd66 100644
--- a/docs/src/pages/docs/api-server.mdx
+++ b/docs/src/pages/docs/api-server.mdx
@@ -21,13 +21,13 @@ import { Settings, EllipsisVertical } from 'lucide-react'
 
 # Local API Server
 
-Jan includes a built-in API server that is compatible with OpenAI's API specification, allowing you to interact with AI models through a local HTTP interface. This means you can use Jan as a drop-in replacement for OpenAI's API, but running entirely on your computer.
+Jan includes a built-in API server that is compatible with OpenAI's API specification, allowing you to
+interact with AI models through a local HTTP interface. This means you can use Jan as a drop-in replacement
+for OpenAI's API, but running entirely on your computer.
 
-Jan uses **Cortex** as its core engine for running AI models. If you need a standalone API server without Jan's desktop interface (for example, in server environments or for command-line usage), you can use Cortex directly: https://cortex.so.
-
-<Callout>
-Full API documentation is available at [Cortex's API Reference](https://cortex.so/api-reference#tag/chat).
-</Callout>
+Jan uses **llama.cpp** as its core engine for running AI models. If you need a standalone API server without
+Jan's desktop interface (for example, in server environments or for command-line usage), you can use it directly
+as well after downloading it from [here](https://github.com/ggml-org/llama.cpp).
 
 <br/>
 ![Local API Server](./_assets/api-server.png)
@@ -39,7 +39,7 @@ Full API documentation is available at [Cortex's API Reference](https://cortex.s
 
 ### Step 1: Start Server
 1. Navigate to the **Local API Server**
-2. Configure [Server Settings](/docs/api-server#server-settings)
+2. Add an API Key (it can be anything) or fully configure the server at [Server Settings](/docs/api-server#server-settings)
 3. Click **Start Server** button
 4. Wait for the confirmation message in the logs panel, your server is ready when you see: `JAN API listening at: http://127.0.0.1:1337`
 
@@ -48,21 +48,20 @@ Full API documentation is available at [Cortex's API Reference](https://cortex.s
 The easiest way to test your server is through the API Playground:
 1. Click the **API Playground** button to open its testing interface
 2. Select a model from the dropdown menu in Jan interface
-3. Try a simple [chat completion](https://cortex.so/api-reference#tag/chat/post/v1/chat/completions) request
+3. Try a simple request
 4. View the response in real-time
 
-### Step 3: Use the API 
-Navigate to [Cortex's API Reference](https://cortex.so/api-reference#tag/chat) to see full API endpoints for your use case.
+### Step 3: Use the API
 </Steps>
 
 
 ## Server Settings
 
 #### Host Address Options
-- **127.0.0.1 (Recommended)**: 
+- **127.0.0.1 (Recommended)**:
   - Only accessible from your computer
   - Most secure option for personal use
-- **0.0.0.0**: 
+- **0.0.0.0**:
   - Makes server accessible from other devices on your network
   - Use with caution and only when necessary
 
@@ -100,24 +99,23 @@ Enable **Verbose Server Logs** for detailed error messages.
 </Callout>
 
 **1. Common Issues**
-   - Confirm the server is running
-   - Check if model is successfully loaded in Jan
-
-   - Check if the port is already in use by another application
-   - Verify you have admin/sudo rights if needed
-   - Make sure your API endpoint matches your server settings. Example: Using `http://localhost:1337` when you set a different port.
-   - Make sure the model name in your API request matches exactly what's shown in Jan. Example: If you selected "Llama 3.2 1B Instruct Q8" in Jan, use `llama3.2-1b-instruct` in your API request.
-   - Verify your JSON request format is correct
-   - Verify firewall settings
-   - Look for detailed error messages in the logs
+- Confirm the server is running
+- Check if model is successfully loaded in Jan
+- Check if the port is already in use by another application
+- Verify you have admin/sudo rights if needed
+- Make sure your API endpoint matches your server settings. Example: Using `http://localhost:1337` when you set a different port.
+- Make sure the model name in your API request matches exactly what's shown in Jan. Example: If you selected "Llama 3.2 1B Instruct Q8" in Jan, use `llama3.2-1b-instruct` in your API request.
+- Verify your JSON request format is correct
+- Verify firewall settings
+- Look for detailed error messages in the logs
 
 **2. CORS Errors in Web Apps**
-   - Enable CORS in server settings if using from a webpage
-   - Verify the origin of the request
-   - Verify your web app's request URL matches the server address exactly
-   - Check browser console for specific error messages
+- Enable CORS in server settings if using from a webpage
+- Verify the origin of the request
+- Verify your web app's request URL matches the server address exactly
+- Check browser console for specific error messages
 
 **3. Performance Issues**
-   - Monitor system resources (CPU, RAM, and GPU usage)
-   - Try to reduce the context length or `ngl` (number of GPU layers)
-   - Check for other resource-intensive applications
+- Monitor system resources (CPU, RAM, and GPU usage)
+- Try to reduce the context length or `ngl` (number of GPU layers)
+- Check for other resource-intensive applications
diff --git a/docs/src/pages/docs/assistants.mdx b/docs/src/pages/docs/assistants.mdx
index de61ec7f7..8625252e8 100644
--- a/docs/src/pages/docs/assistants.mdx
+++ b/docs/src/pages/docs/assistants.mdx
@@ -21,21 +21,23 @@ import { Callout, Steps } from 'nextra/components'
 
 # Assistants
 
-Jan allows you to manage multiple Assistants, each with its own configuration profile that determines how the AI should behave and respond to your inputs. You can add, edit, or delete assistants, and customize their instructions and settings.
+Jan allows you to give models specific sets of instructions without having to repeat yourself. We called these
+models with your instructions, Assistants. Each of these assistants can also have their own set of configuration
+which can help guide how the AI model should behave and respond to your inputs. You can add, edit, or delete
+assistants, and customize their instructions and settings from the Assistants tab.
 
-![Assistants UI Overview](./_assets/assistants-ui-overview.png)
+![The Assistants management page, where you can view, add, edit, or delete assistants. Each assistant has a name,
+description, and can be customized for different tasks.](./_assets/assistants-ui-overview.png)
 
-*Screenshot: The Assistants management page, where you can view, add, edit, or delete assistants. Each assistant has a name, description, and can be customized for different tasks.*
-
-## Accessing the Assistants Page
+To find the Assistants tab:
 
 1. Open Jan and look at the left sidebar.
 2. Click on the **Assistants** tab (see highlighted section in the screenshot above).
-3. The main panel will display all your current assistants
+3. The main panel will display all your current assistants.
 
 ## Managing Assistants
 
-- **Add a New Assistant**: Click the `+` button in the Assistants panel to create a new assistant profile.
+- **Add a New Assistant**: Click the `+` button in the Assistants panel to create a new assistant with your instructions.
 - **Edit an Assistant**: Click the pencil (✏️) icon on any assistant card to update its name, description, or instructions.
 - **Delete an Assistant**: Click the trash (🗑️) icon to remove an assistant you no longer need.
 
@@ -44,7 +46,7 @@ Jan allows you to manage multiple Assistants, each with its own configuration pr
 Each assistant can have its own set of instructions to guide its behavior. For example:
 
 ```
-Act as a software development mentor focused on Python and JavaScript.
+Act as a software engineering mentor focused on Python and JavaScript.
 Provide detailed explanations with code examples when relevant.
 Use markdown formatting for code blocks.
 ```
@@ -56,23 +58,30 @@ Respond in a casual, friendly tone. Keep explanations brief and use simple langu
 Provide examples when explaining complex topics.
 ```
 
+Or:
+
+```
+Respond in a casual, friendly tone. Keep explanations brief and use simple language.
+Provide examples when explaining complex topics.
+```
+
 ## Best Practices
 - Be clear and specific about the desired behavior for each assistant.
 - Include preferences for formatting, tone, or style.
+- Include examples to increase the model's compliance with your request.
 - Use different assistants for different tasks (e.g., translation, travel planning, financial advice).
 
----
-
-*Note: The ability to create, edit, and delete assistants is available in the Assistants tab. Each assistant can be tailored for a specific use case, making Jan a flexible and powerful tool for your needs.*
 
 ## Switching and Managing Assistants in Chat
 
-You can quickly switch between assistants, or create and edit them, directly from the Chat screen using the assistant dropdown menu at the top:
+You can quickly switch between assistants, or create and edit them, directly from the Chat screen using the
+assistant dropdown menu at the top:
 
 ![Assistant Dropdown](./_assets/assistant-dropdown.png)
 
-- Click the assistant name (e.g., "Travel Planner") at the top of the Chat screen to open the dropdown menu.
-- The dropdown lists all your assistants. Click any assistant to switch to it for the current chat session.
+- Click the assistant's name (e.g., "Travel Planner") at the top of the Chat screen to open the dropdown menu.
+- The dropdown lists all of your assistants. Click on any of the assistants available to switch to it for the
+current chat session.
 - To create a new assistant, select **Create Assistant** at the bottom of the dropdown. This opens the Add Assistant dialog:
 
 ![Add Assistant Dialog](./_assets/assistant-add-dialog.png)
@@ -82,10 +91,11 @@ You can quickly switch between assistants, or create and edit them, directly fro
 ![Edit Assistant Dialog](./_assets/assistant-edit-dialog.png)
 
 ### Add/Edit Assistant Dialogs
-- Set an emoji and name for your assistant.
+- Set an (optional) emoji and name for your assistant.
 - Optionally add a description.
 - Enter detailed instructions to guide the assistant's behavior.
-- Adjust predefined parameters (like Temperature, Top P, etc.) or add custom parameters as needed.
+- Adjust the predefined parameters (like Temperature, Top P, etc.) or add custom parameters as needed.
 - Click **Save** to apply your changes.
 
-This workflow allows you to seamlessly manage and switch between assistants while chatting, making it easy to tailor Jan to your needs in real time.
\ No newline at end of file
+This workflow allows you to seamlessly manage and switch between assistants while chatting, making it easy to tailor
+Jan to your needs in real time.
diff --git a/docs/src/pages/docs/configure-extensions.mdx b/docs/src/pages/docs/configure-extensions.mdx
deleted file mode 100644
index 71d226554..000000000
--- a/docs/src/pages/docs/configure-extensions.mdx
+++ /dev/null
@@ -1,301 +0,0 @@
----
-title: Configure Extensions
-description: Learn about Jan's default extensions and explore how to configure them.
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Jan Extensions,
-    Extensions,
-  ]
----
-
-# Configure Extensions 
-Extensions in Jan are configured through a JSON file that defines their behavior and metadata. While most users won't need to modify this file directly, advanced users can customize extension settings for specific needs.
-
-## Access extensions.json
-
-1. Navigate to [Jan Data Folder](/docs/data-folder):
-2. Open the `extensions.json` file in any text editor
-
-## Configuration Options
-
-| Option | Description | Example |
-|--------|-------------|---------|
-| `_active` | Enable/disable the extension | `true` or `false` |
-| `listeners` | Event listeners configuration | `{}` for default |
-| `origin` | Installation path of the extension | `"C:\\Users\\...\\jan\\resources\\app.asar.unpacked\\..."` |
-| `installOptions` | Installation configuration | `{"version": false, "fullMetadata": true}` |
-| `name` | Internal extension identifier | `"@janhq/conversational-extension"` |
-| `productName` | Display name shown in UI | `"Conversational"` |
-| `version` | Extension version number | `"1.0.0"` |
-| `main` | Entry point file path | `"dist/index.js"` |
-| `description` | Extension description | `"This extension enables conversations..."` |
-| `url` | Extension repository URL | `"extension://@janhq/conversational-extension/dist/index.js"` |
-
-## Full Example Configuration
-
-```json title="~/jan/data/extensions/extensions.json"
-{
-    "@janhq/conversational-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-conversational-extension-1.0.0.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/conversational-extension",
-        "productName": "Conversational",
-        "version": "1.0.0",
-        "main": "dist/index.js",
-        "description": "This extension enables conversations and state persistence via your filesystem.",
-        "url": "extension://@janhq/conversational-extension/dist/index.js"
-    },
-    "@janhq/inference-anthropic-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-anthropic-extension-1.0.2.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-anthropic-extension",
-        "productName": "Anthropic Inference Engine",
-        "version": "1.0.2",
-        "main": "dist/index.js",
-        "description": "This extension enables Anthropic chat completion API calls.",
-        "url": "extension://@janhq/inference-anthropic-extension/dist/index.js"
-    },
-    "@janhq/inference-triton-trt-llm-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-triton-trt-llm-extension-1.0.0.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-triton-trt-llm-extension",
-        "productName": "Triton-TRT-LLM Inference Engine",
-        "version": "1.0.0",
-        "main": "dist/index.js",
-        "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option.",
-        "url": "extension://@janhq/inference-triton-trt-llm-extension/dist/index.js"
-    },
-    "@janhq/inference-mistral-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-mistral-extension-1.0.1.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-mistral-extension",
-        "productName": "MistralAI Inference Engine",
-        "version": "1.0.1",
-        "main": "dist/index.js",
-        "description": "This extension enables Mistral chat completion API calls.",
-        "url": "extension://@janhq/inference-mistral-extension/dist/index.js"
-    },
-    "@janhq/inference-martian-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-martian-extension-1.0.1.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-martian-extension",
-        "productName": "Martian Inference Engine",
-        "version": "1.0.1",
-        "main": "dist/index.js",
-        "description": "This extension enables Martian chat completion API calls.",
-        "url": "extension://@janhq/inference-martian-extension/dist/index.js"
-    },
-    "@janhq/inference-openrouter-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-openrouter-extension-1.0.0.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-openrouter-extension",
-        "productName": "OpenRouter Inference Engine",
-        "version": "1.0.0",
-        "main": "dist/index.js",
-        "description": "This extension enables Open Router chat completion API calls.",
-        "url": "extension://@janhq/inference-openrouter-extension/dist/index.js"
-    },
-    "@janhq/inference-nvidia-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-nvidia-extension-1.0.1.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-nvidia-extension",
-        "productName": "NVIDIA NIM Inference Engine",
-        "version": "1.0.1",
-        "main": "dist/index.js",
-        "description": "This extension enables NVIDIA chat completion API calls.",
-        "url": "extension://@janhq/inference-nvidia-extension/dist/index.js"
-    },
-    "@janhq/inference-groq-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-groq-extension-1.0.1.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-groq-extension",
-        "productName": "Groq Inference Engine",
-        "version": "1.0.1",
-        "main": "dist/index.js",
-        "description": "This extension enables fast Groq chat completion API calls.",
-        "url": "extension://@janhq/inference-groq-extension/dist/index.js"
-    },
-    "@janhq/inference-openai-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-openai-extension-1.0.2.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-openai-extension",
-        "productName": "OpenAI Inference Engine",
-        "version": "1.0.2",
-        "main": "dist/index.js",
-        "description": "This extension enables OpenAI chat completion API calls.",
-        "url": "extension://@janhq/inference-openai-extension/dist/index.js"
-    },
-    "@janhq/inference-cohere-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-cohere-extension-1.0.0.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-cohere-extension",
-        "productName": "Cohere Inference Engine",
-        "version": "1.0.0",
-        "main": "dist/index.js",
-        "description": "This extension enables Cohere chat completion API calls.",
-        "url": "extension://@janhq/inference-cohere-extension/dist/index.js"
-    },
-    "@janhq/model-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-model-extension-1.0.33.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/model-extension",
-        "productName": "Model Management",
-        "version": "1.0.33",
-        "main": "dist/index.js",
-        "description": "Model Management Extension provides model exploration and seamless downloads.",
-        "url": "extension://@janhq/model-extension/dist/index.js"
-    },
-    "@janhq/monitoring-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-monitoring-extension-1.0.10.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/monitoring-extension",
-        "productName": "System Monitoring",
-        "version": "1.0.10",
-        "main": "dist/index.js",
-        "description": "This extension provides system health and OS level data.",
-        "url": "extension://@janhq/monitoring-extension/dist/index.js"
-    },
-    "@janhq/assistant-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-assistant-extension-1.0.1.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/assistant-extension",
-        "productName": "Jan Assistant",
-        "version": "1.0.1",
-        "main": "dist/index.js",
-        "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models.",
-        "url": "extension://@janhq/assistant-extension/dist/index.js"
-    },
-    "@janhq/tensorrt-llm-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-tensorrt-llm-extension-0.0.3.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/tensorrt-llm-extension",
-        "productName": "TensorRT-LLM Inference Engine",
-        "version": "0.0.3",
-        "main": "dist/index.js",
-        "description": "This extension enables Nvidia's TensorRT-LLM for the fastest GPU acceleration. See the [setup guide](https://jan.ai/guides/providers/tensorrt-llm/) for next steps.",
-        "url": "extension://@janhq/tensorrt-llm-extension/dist/index.js"
-    },
-    "@janhq/inference-cortex-extension": {
-        "_active": true,
-        "listeners": {},
-        "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-cortex-extension-1.0.15.tgz",
-        "installOptions": {
-            "version": false,
-            "fullMetadata": true
-        },
-        "name": "@janhq/inference-cortex-extension",
-        "productName": "Cortex Inference Engine",
-        "version": "1.0.15",
-        "main": "dist/index.js",
-        "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
-        "url": "extension://@janhq/inference-cortex-extension/dist/index.js"
-    }
-}
-```
-
-## Common Use Cases
-
-1. **Disable an Extension**
-   ```json
-   {
-     "@janhq/example-extension": {
-       "_active": false
-       // other settings...
-     }
-   }
-   ```
-
-2. **Update Entry Point**
-   ```json
-   {
-     "@janhq/example-extension": {
-       "main": "new/path/index.js"
-       // other settings...
-     }
-   }
-   ```
-
-## Important Notes
-
-- Always backup the `extensions.json` file before making changes
-- Invalid JSON syntax can prevent Jan from starting properly
-- Most users should manage extensions through Jan's Settings UI
-- Changes require restarting Jan to take effect
diff --git a/docs/src/pages/docs/install-engines.mdx b/docs/src/pages/docs/custom-providers.mdx
similarity index 61%
rename from docs/src/pages/docs/install-engines.mdx
rename to docs/src/pages/docs/custom-providers.mdx
index cded71c83..962f8040d 100644
--- a/docs/src/pages/docs/install-engines.mdx
+++ b/docs/src/pages/docs/custom-providers.mdx
@@ -1,6 +1,6 @@
 ---
-title: Install Engines
-description: Learn about Jan's default extensions and explore how to configure them.
+title: Adding New Providers
+description: Learn about how to add new providers to Jan.
   [
     Jan,
     Customizable Intelligence, LLM,
@@ -19,19 +19,104 @@ description: Learn about Jan's default extensions and explore how to configure t
 import { Callout } from 'nextra/components'
 import { Settings, EllipsisVertical } from 'lucide-react'
 
-# Install Engines
+# Adding New Providers
+
+Any local and remote model provider that has a compatible OpenAI API can be used in combination with Jan and both
+can be added in the same way.
+
+## Local
+
+Jan only allows you to run GGUF files but you can add other files via an OpenAI API compatible server like [vllm](https://vllm.ai/)
+or [SGLang]().
+
+### vLLM
+
+First, make sure you have [uv](https://docs.astral.sh/uv/) installed.
+
+Next, create a virtual environment to install vLLM in it.
+
+```sh
+uv venv .venv --python 3.12
+```
+
+Activate your environment.
+```sh
+source .venv/bin/activate
+```
+
+Install `vllm` in it.
+```sh
+uv pip install vllm
+```
+
+Next, start a server and download a model at the same time.
+
+
+
+
+### SGLang
+
+Create a virtual environment to install vLLM in it.
+
+```sh
+uv venv .venv --python 3.12
+```
+
+Activate your environment.
+```sh
+source .venv/bin/activate
+```
+
+Install `vllm` in it.
+```sh
+uv pip install vllm
+```
+
+Next, start a server and download a model at the same time.
 
-## Install Local Engines
-Jan currently doesn't support installing a local engine yet.
 
 ## Install Remote Engines
 
-### Step-by-step Guide
-You can add any OpenAI API-compatible providers like OpenAI, Anthropic, or others.
-To add a new remote engine:
+You can add any OpenAI API-compatible providers like Together AI, Fireworks AI, and others. Let's walk through
+some examples.
+
+### Together AI
+
+Create an account or login to existing [Together AI](https://together.ai)'s Dashboard.
+
+![Together's Dashboard](./_assets/together.png)
+
+Click on a View all models.
+
+
+Pick a free model like `deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free`.
+
+
+Where it says **Run Inference**, click on `curl` and grab:
+
+- `url`: `https://api.together.xyz/v1/chat/completions`
+- `model`: `deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free`
+
+Get your API key.
+
+
+Go back to Jan and open **Settings > Model Providers** and click on **Add Provider**.
+
+
+
+Name it Together and proceed.
+
+
+Add your API key and change the URL.
+
+
+Enter the `model id` by clicking on the `+` sign where it says Models.
+
+
+
 
 1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Engines**
-1. At **Remote Engine** category, click **+ Install Engine** 
+1. At **Remote Engine** category, click **+ Install Engine**
 
 <br/>
 ![Install Remote Engines](./_assets/install-engines-01.png)
@@ -57,7 +142,7 @@ To add a new remote engine:
 > - The conversion functions are only needed for providers that don't follow the OpenAI API format. For OpenAI-compatible APIs, you can leave these empty.
 > - For OpenAI-compatible APIs like OpenAI, Anthropic, or Groq, you only need to fill in the required fields. Leave optional fields empty.
 
-4. Click **Install** 
+4. Click **Install**
 5. Once completed, you should see your engine in **Engines** page:
     - You can rename or uninstall your engine
     - You can navigate to its own settings page
@@ -78,7 +163,7 @@ Here's how to set up OpenAI as a remote engine:
 
 
 #### Custom APIs Setup
-If you're integrating an API that doesn't follow OpenAI's format, you'll need to use the conversion functions. 
+If you're integrating an API that doesn't follow OpenAI's format, you'll need to use the conversion functions.
 Let's say you have a custom API with this format:
 
 ```javascript
@@ -107,53 +192,51 @@ Transform response template: your transform response template here
 ```
 
 1. Header template
-```
+```json
 "Authorization: Bearer {{api_key}}"
 ```
 2. Transform request template:
 Convert from Jan's OpenAI-style format to your API's format
-```
+
+```json
 "chat_completions": {
   "url": "https://api.custom_endpoint.com/v1/messages",
   "template": "{
 {% for key, value in input_request %}
-  {% if key == "messages" %} 
+  {% if key == "messages" %}
     "prompt": "{{ last(input_request.messages).content }}"
   {% else if key == "max_tokens" or key == "temperature" %}
-    "{{ key }}": {{ tojson(value) }}  
+    "{{ key }}": {{ tojson(value) }}
   {% endif %}
 {% endfor %}
 }"
 }
 ```
 
-
 3. Transform response template
 Convert from your API's format back to OpenAI-style format
-```
+
+```json
 "chat_completions": {
-"template": "{
-{
-  "choices": [{
-      "message": {
-        "role": "assistant",
-        "content": "{{ input_request.generated_text }}"
+  "template": {
+    "choices": [{
+        "message": {
+          "role": "assistant",
+          "content": "{{ input_request.generated_text }}"
+        }
+      }],
+      "usage": {
+        "total_tokens": "{{ input_request.tokens_used }}"
       }
-    }],
-    "usage": {
-      "total_tokens": {{ input_request.tokens_used }}
-    }
-}
-}"
+  }
 }
 ```
 
-
-
 **Expected Formats:**
 
 1. Jan's Request Format
-```
+
+```json
 {
   "messages": [
     {"role": "user", "content": "What is AI?"}
@@ -164,7 +247,8 @@ Convert from your API's format back to OpenAI-style format
 ```
 
 2. Jan's Expected Response Format
-```
+
+```json
 {
   "choices": [{
     "message": {
@@ -177,17 +261,3 @@ Convert from your API's format back to OpenAI-style format
   }
 }
 ```
-
-<Callout type="warning">
-Make sure to test your conversion functions thoroughly. Incorrect conversions may cause errors or unexpected behavior.
-</Callout>
-
-
-
-
-
-
-
-
-
-
diff --git a/docs/src/pages/docs/data-folder.mdx b/docs/src/pages/docs/data-folder.mdx
index 5abe68237..acbbb025b 100644
--- a/docs/src/pages/docs/data-folder.mdx
+++ b/docs/src/pages/docs/data-folder.mdx
@@ -32,9 +32,11 @@ Jan stores your data locally in JSON format. Your data is yours alone.
 Via Jan:
 1. Settings (<Settings width={16} height={16} style={{display:"inline"}}/>) > Advanced Settings
 2. Click <FolderOpen width={16} height={16} style={{display:"inline"}}/>
+
 <br/>
 ![Open Jan Data Folder](./_assets/settings-11.png)
 <br/>
+
 Via Terminal:
 
 ```bash
diff --git a/docs/src/pages/docs/desktop.mdx b/docs/src/pages/docs/desktop.mdx
deleted file mode 100644
index ba59ae0da..000000000
--- a/docs/src/pages/docs/desktop.mdx
+++ /dev/null
@@ -1,35 +0,0 @@
----
-title: Installation
-description: Jan is a ChatGPT-alternative that runs on your computer, with a local API server.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Hardware Setup,
-    GPU,
-  ]
----
-
-import { Cards, Card } from 'nextra/components'
-import childPages from './desktop/_meta.json';
-
-# Installation
-
-<br/>
-
-<Cards
-  children={Object.keys(childPages).map((key, i) => (
-    <Card
-      key={i}
-      title={childPages[key].title}
-      href={childPages[key].href}
-    />
-  ))}
-/>
\ No newline at end of file
diff --git a/docs/src/pages/docs/error-codes.mdx b/docs/src/pages/docs/error-codes.mdx
deleted file mode 100644
index 7a3bde580..000000000
--- a/docs/src/pages/docs/error-codes.mdx
+++ /dev/null
@@ -1,101 +0,0 @@
----
-title: Error Codes
-description: Learn Jan application's error codes and how to solve them.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    troubleshooting,
-    error codes,
-    broken build,
-    something amiss,
-    unexpected token,
-    undefined issue,
-    permission denied,
-  ]
----
-
-import { Tabs } from 'nextra/components'
-import { Callout } from 'nextra/components'
-
-# Error Codes
-This article summarizes error codes in the Jan application, categorized by application feature. Each error includes a unique code and a **Prefix** indicating its category.
-The following sections detail the error categories, their descriptions, and the error types.
-<Callout type="info">
-See the Solution column for error troubleshooting.
-</Callout>
-## Error Code Categories
-
-### 1. Installation Errors
-These errors relate to issues encountered during installation, including desktop and server setups.
-- **Prefix**: INST
-
-### 2. Hardware Setup Errors
-These errors relate to configuring and setting up hardware components.
-- **Prefix**: HW
-#### Error Types
-| Error Code | Cause                      | Solution                                                                |
-|------------|-----------------------------------------|-------------------------------------------------------------------------------------------|
-| HW-1       | The CUDA toolkit may be unavailable.     | [Troubleshooting Nvidia GPU](/docs/troubleshooting#1-ensure-gpu-mode-requirements) |
-| HW-2       | Problem with Nvidia drivers.             | [Troubleshooting Nvidia GPU](/docs/troubleshooting#troubleshooting-nvidia-gpu)     |
-
-
-### 3. Architecture Errors
-These errors relate to problems with the overall system architecture and configuration setups.
-- **Prefix**: ARCH
-
-### 4. Basic Usage Errors
-These errors occur during the basic usage of the application, including issues with models, assistants, and tools.
-- **Prefix**: USG
-#### Error Types
-| Error Code | Cause                | Solution           |
-|------------|-----------------------------------|--------------------------------------|
-| USG-1      | Model is currently unavailable.    | [How to Use Model](/docs/models) |
-
-
-### 5. Advanced Settings Errors
-These errors relate to advanced settings and configurations within the application.
-- **Prefix**: ADV
-
-### 6. Inference Engine Errors
-These errors relate to inference engines, both local and remote.
-- **Prefix**: IE
-#### Error Types
-| Error Code | Cause                          | Solution                                                                                    |
-|------------|---------------------------------------------|---------------------------------------------------------------------------------------------------------------|
-| IE-2       | The OpenAI model's API key is invalid.          | [How to Integrate OpenAI API](/docs/remote-inference/openai#how-to-integrate-openai-api-with-jan) |
-| IE-3       | The Groq model's API key is invalid.             | [How to Integrate Groq API](/docs/remote-inference/groq#how-to-integrate-groq-api-with-jan)      |
-| IE-4       | The Mistral model's API key is invalid.          | [How to Integrate Mistral API](/docs/remote-inference/mistralai)                                 |
-| IE-5       | The OpenRouter model's API key is invalid.       | [How to Integrate OpenRouter API](/docs/remote-inference/openrouter)                                 |
-
-### 7. Local API Server Errors
-These errors relate to the local API server's functionality.
-- **Prefix**: API
-#### Error Types
-| Error Code | Cause                     | Solution                                   |
-|------------|----------------------------------------|--------------------------------------------------------------|
-| API-1      | Port 39291 is currently unavailable.     | [Local API Server Guide](/docs/local-api#step-1-set-the-local-server) |
-
-
-### 8. Extensions and Integration Errors
-These errors relate to integrating the application with external systems or extensions.
-- **Prefix**: EXT
-
-### 9. Troubleshooting Errors
-These errors occur during the troubleshooting processes and procedures.
-- **Prefix**: TRO
-
-### 10. Unclear Errors
-These errors don't classify into the standard categories above, making their causes difficult to identify.
-#### Error Codes
-| Error Title      | Solution |
-|-------------------|------------------|
-| Something’s Amiss | [Troubleshooting Something’s Amiss](/docs/troubleshooting#somethings-amiss) |
-| Undefined Issue   | [Troubleshooting Undefined Issue](/docs/troubleshooting#undefined-issue) |
diff --git a/docs/src/pages/docs/extensions-settings/_meta.json b/docs/src/pages/docs/extensions-settings/_meta.json
deleted file mode 100644
index 100d720dd..000000000
--- a/docs/src/pages/docs/extensions-settings/_meta.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "model-management": {
-      "title": "Model Management",
-      "href": "/docs/extensions-settings/model-management"
-    },
-    "system-monitoring": {
-      "title": "System Monitoring",
-      "href": "/docs/extensions-settings/system-monitoring"
-    }
-}
-  
\ No newline at end of file
diff --git a/docs/src/pages/docs/extensions-settings/model-management.mdx b/docs/src/pages/docs/extensions-settings/model-management.mdx
deleted file mode 100644
index 4fbe315d5..000000000
--- a/docs/src/pages/docs/extensions-settings/model-management.mdx
+++ /dev/null
@@ -1,42 +0,0 @@
----
-title: Model Management
-description: Learn about Jan's default extensions and explore how to configure them.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Jan Extensions,
-    Extensions,
-  ]
----
-
-import { Callout } from 'nextra/components'
-import { Settings, EllipsisVertical, Plus, FolderOpen, Pencil } from 'lucide-react'
-
-# Model Management 
-
-This extension configures how Jan handles model downloads and management:
-- Model discovery and browsing
-- Version control & configuration handling
-- Download management
-
-## Hugging Face Access Token
-
-Access tokens authenticate your identity to Hugging Face Hub for model downloads. 
-1. Get your token from [Hugging Face Tokens](https://huggingface.co/docs/hub/en/security-tokens)
-2. Enter your token in **Settings > Model Providers > Llama.cpp > Hugging Face Access Token**
-
-<Callout type="warning">
-Keep your access tokens secure and never share them. 
-</Callout>
-
-<br/>
-![Model Management](../_assets/extensions-02.png)
-<br/>
\ No newline at end of file
diff --git a/docs/src/pages/docs/extensions-settings/system-monitoring.mdx b/docs/src/pages/docs/extensions-settings/system-monitoring.mdx
deleted file mode 100644
index f506842e6..000000000
--- a/docs/src/pages/docs/extensions-settings/system-monitoring.mdx
+++ /dev/null
@@ -1,54 +0,0 @@
----
-title: System Monitoring
-description: Learn about Jan's default extensions and explore how to configure them.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Jan Extensions,
-    Extensions,
-  ]
----
-import { Callout } from 'nextra/components'
-import { Settings, EllipsisVertical } from 'lucide-react'
-
-
-# System Monitoring
-Provides system health and OS level data:
-- Hardware utilization tracking
-- Performance monitoring
-- Error logging
-
-You can configure your logs in Jan in **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Core Extensions** > **System Monitoring**:
-
-
-## Enable App Logs
-Jan can save logs locally on your computer for:
-- Debugging model issues
-- Crash reports
-- Download troubleshooting
-
-To enable, toggle on **Enable App Logs**:
-
-<br/>
-![Model Management](../_assets/extensions-03.png)
-<br/>
-
-
-## Log Cleaning Interval
-Set automatic log deletion interval in milliseconds:
-- Default: 120000 (2 minutes); however, there's no minimum or maximum intervals 
-- Controls disk space usage
-- Prevents log accumulation
-
-<Callout type='info'>
-You can clear Jan logs manually with [Clear logs](/docs/settings#log-management) in **Privacy**.
-</Callout>
-
diff --git a/docs/src/pages/docs/extensions.mdx b/docs/src/pages/docs/extensions.mdx
deleted file mode 100644
index aa612a940..000000000
--- a/docs/src/pages/docs/extensions.mdx
+++ /dev/null
@@ -1,75 +0,0 @@
----
-title: Extensions Overview
-description: Learn about Jan's default extensions and explore how to configure them.
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Jan Extensions,
-    Extensions,
-  ]
----
-
-import { Callout } from 'nextra/components'
-import { Settings, EllipsisVertical } from 'lucide-react'
-
-
-# Extensions 
-
-## Overview
-Extensions are modular components that add functionality to Jan. Each extension is designed to handle specific features. 
-
-Extensions can be managed through **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Extensions**:
-
-<br/>
-![Remove Model](./_assets/extensions-01.png)
-<br/>
-
-## Core Extensions 
-
-### Cortex
-The primary extension that manages both **local** and **remote engines** capabilities:
-
-#### Local Engines
-[llama.cpp](/docs/local-engines/llama-cpp): Fast, efficient local inference engine that runs GGUF models directly on your device. Powers Jan's default local AI capabilities with support for multiple hardware configurations.
-
-#### Remote Engines
-- [Anthropic](/docs/remote-models/anthropic): Access Claude models
-- [Cohere](/docs/remote-models/cohere): Access Cohere's models
-- [Groq](/docs/remote-models/groq): High-performance inference
-- [Martian](/docs/remote-models/martian): Specialized model access
-- [MistralAI](/docs/remote-models/mistralai): Access Mistral models
-- [NVIDIA NIM](/docs/remote-models/nvidia-nim) (NVIDIA Inference Microservices): Platform for deploying and serving GPU-accelerated AI models, providing enterprise-grade reliability and scalability.
-- [OpenAI](/docs/remote-models/openai): Access GPT models
-- [OpenRouter](/docs/remote-models/openrouter): Multi-provider model access
-- [Triton-TRT-LLM](/docs/remote-models/triton): High-performance inference backend using NVIDIA Triton Inference Server with TensorRT-LLM optimization, designed for large-scale model deployment.
-
-
-
-### Jan Assistant
-Enables assistants functionality, including Jan - the default assistant that can utilize all downloaded models. This extension manages:
-- Default assistant configurations
-- Model selection
-- Conversation management
-
-### Conversational 
-Manages all chat-related functionality and data persistence:
-
-
-### [Model Management](/docs/extensions-settings/model-management)
-Provides model exploration and seamless downloads:
-- Model discovery and browsing
-- Version control & configuration handling
-- Download management
-
-### [System Monitoring](/docs/extensions-settings/system-monitoring)
-Provides system health and OS level data:
-- Hardware utilization tracking
-- Performance monitoring
-- Error logging
diff --git a/docs/src/pages/docs/index.mdx b/docs/src/pages/docs/index.mdx
index f395d0ac7..da7cd9453 100644
--- a/docs/src/pages/docs/index.mdx
+++ b/docs/src/pages/docs/index.mdx
@@ -1,6 +1,6 @@
 ---
 title: Jan
-description: Jan is an open-source, self-hosted alternative to OpenAI's platform - build and run AI on your own desktop or server.
+description: Jan is an open-source AI assistant and self-hosted AI platform - build and run AI on your own desktop or server.
 keywords:
   [
     Jan,
@@ -25,37 +25,35 @@ import FAQBox from '@/components/FaqBox'
 ![Jan's Cover Image](./_assets/jan-app.png)
 
 
-Jan is an AI chat application that runs 100% offline on your desktop & mobile (*coming soon*). Our goal is to
+Jan is an AI chat application that runs 100% offline on your desktop and (*soon*) on mobile. Our goal is to
 make it easy for anyone, with or without coding skills, to download and use AI models with full control and
 [privacy](https://www.reuters.com/legal/legalindustry/privacy-paradox-with-ai-2023-10-31/).
 
-Jan is powered by [Cortex](https://cortex.so/), our embeddable local AI engine which provides an OpenAI-compatible
-API that can run in the background at `https://localhost:1337` (or a custom port). This enables you to power other
-applications running locally with AI capabilities. For example, you can connect tools like [Continue.dev](https://jan.ai/integrations/coding/vscode)
-and [Cline](https://cline.bot/), or any OpenAI-compatible app, to Jan and start coding on their supported editors using
-models hosted in Jan.
+Jan is powered by [Llama.cpp](https://github.com/ggerganov/llama.cpp), a local AI engine that provides an OpenAI-compatible
+API that can run in the background by default at `https://localhost:1337` (or your custom port). This enables you to power all sorts of
+applications with AI capabilities from your laptop/PC. For example, you can connect local tools like [Continue](https://jan.ai/integrations/coding/vscode)
+and [Cline](https://cline.bot/) to Jan and power them using your favorite models.
 
-Jan doesn't limit you to locally hosted models, meaning, you can create an API key from your favorite model provider
-and add it to Jan via the configuration's page and start talking to your favorite paid models.
+Jan doesn't limit you to locally hosted models, meaning, you can create an API key from your favorite model provider,
+add it to Jan via the configuration's page and start talking to your favorite models.
 
 ### Features
 
-- Download popular open-source LLMs (Llama3, Gemma3, Mistral, and more) from the HuggingFace [Model Hub](./docs/models/manage-models.mdx)
-or import any GGUF models available locally
-- Connect to [cloud model services](/docs/remote-models/openai) (OpenAI, Anthropic, Mistral, Groq, etc.)
+- Download popular open-source LLMs (Llama3, Gemma3, Qwen3, and more) from the HuggingFace [Model Hub](./docs/models/manage-models.mdx)
+or import any GGUF files (the model format used by llama.cpp) available locally
+- Connect to [cloud services](/docs/remote-models/openai) (OpenAI, Anthropic, Mistral, Groq, etc.)
 - [Chat](./docs/threads.mdx) with AI models & [customize their parameters](./docs/models/model-parameters.mdx) via our
 intuitive interface
-- Use our [local API server](https://jan.ai/api-reference) with an OpenAI-equivalent API
-- Customize Jan with [extensions](/docs/extensions)
+- Use our [local API server](https://jan.ai/api-reference) with an OpenAI-equivalent API to power other apps.
 
 ### Philosophy
 
 Jan is built to be [user-owned](about#-user-owned), this means that Jan is:
-- Truly open source via the [AGPLv3 license](https://github.com/menloresearch/jan/blob/dev/LICENSE)
+- Truly open source via the [Apache 2.0 license](https://github.com/menloresearch/jan/blob/dev/LICENSE)
 - [Data is stored locally, following one of the many local-first principles](https://www.inkandswitch.com/local-first)
-- Runs 100% offline, with privacy by default
+- Internet is optional, Jan ca run 100% offline
 - Free choice of AI models, both local and cloud-based
-- We do not collect or sell user data. See our [Privacy](/privacy).
+- We do not collect or sell user data. See our [Privacy Policy](./privacy).
 
 <Callout>
   You can read more about our [philosophy](/about#philosophy) here.
@@ -67,17 +65,15 @@ Jan is inspired by the concepts of [Calm Computing](https://en.wikipedia.org/wik
 
 ## Acknowledgements
 
-Jan is built on the shoulders of many upstream open-source projects:
+Jan is built on the shoulders of many open-source projects like:
 
 - [Llama.cpp](https://github.com/ggerganov/llama.cpp/blob/master/LICENSE)
-- [LangChain.js](https://github.com/langchain-ai/langchainjs/blob/main/LICENSE)
-- [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM/blob/main/LICENSE)
 - [Scalar](https://github.com/scalar/scalar)
 
 ## FAQs
 
 <FAQBox title="What is Jan?">
-  Jan is a customizable AI assistant that runs offline on your computer - a privacy-focused alternative to tools like
+  Jan is a customizable AI assistant that can run offline on your computer - a privacy-focused alternative to tools like
   ChatGPT, Anthropic's Claude, and Google Gemini, with optional cloud AI support.
 </FAQBox>
 
@@ -87,8 +83,10 @@ Jan is built on the shoulders of many upstream open-source projects:
 </FAQBox>
 
 <FAQBox title="Is Jan compatible with my system?">
-  Jan supports all major operating systems, [Mac](/docs/desktop/mac#compatibility), [Windows](/docs/desktop/windows#compatibility),
-  and [Linux](docs/desktop/linux).
+  Jan supports all major operating systems,
+  - [Mac](/docs/desktop/mac#compatibility)
+  - [Windows](/docs/desktop/windows#compatibility)
+  - [Linux](docs/desktop/linux).
 
   Hardware compatibility includes:
   - NVIDIA GPUs (CUDA)
@@ -109,11 +107,11 @@ Jan is built on the shoulders of many upstream open-source projects:
   </Callout>
 
   You can optionally share anonymous usage statistics to help improve Jan, but your conversations are never
-  shared. See our complete [Privacy Policy](./docs/privacy.mdx).
+  shared. See our complete [Privacy Policy](./docs/privacy).
 </FAQBox>
 
 <FAQBox title="What models can I use with Jan?">
-  - Download optimized models from [Jan Hub](/docs/models/manage-models#1-download-from-jan-hub-recommended)
+  - Download optimized models from the [Jan Hub](/docs/models/manage-models#1-download-from-jan-hub-recommended)
   - Import GGUF models from Hugging Face or your local files
   - Connect to cloud providers like OpenAI, Anthropic, Mistral and Groq (requires your own API keys)
 </FAQBox>
@@ -127,21 +125,16 @@ Jan is built on the shoulders of many upstream open-source projects:
   Yes! Once you've downloaded a local model, Jan works completely offline with no internet connection needed.
 </FAQBox>
 
-<FAQBox title="How can I customize or extend Jan?">
-  Jan has an extensible architecture similar to VSCode and Obsidian. You can build custom features using our
-  [extensions API](/docs/extensions), which powers many of Jan's core features.
-</FAQBox>
-
 <FAQBox title="How can I contribute or get community help?">
   - Join our [Discord community](https://discord.gg/qSwXFx6Krr) to connect with other users
   - Contribute through [GitHub](https://github.com/menloresearch/jan) (no permission needed!)
-  - Get troubleshooting help in our [Discord](https://discord.com/invite/FTk2MvZwJH) [#🆘|jan-help](https://discord.com/channels/1107178041848909847/1192090449725358130) channel
-  - Check our [Troubleshooting](./docs/troubleshooting.mdx) guide for common issues
+  - Get troubleshooting help in our [Discord](https://discord.com/invite/FTk2MvZwJH) channel [#🆘|jan-help](https://discord.com/channels/1107178041848909847/1192090449725358130)
+  - Check our [Troubleshooting](./docs/troubleshooting) guide for common issues
 </FAQBox>
 
 <FAQBox title="Can I self-host Jan?">
-  Yes! We fully support the self-hosted movement. Either [download Jan](./download.mdx) directly or fork and build
-  from our [GitHub repository](https://github.com/menloresearch/jan).
+  Yes! We fully support the self-hosted movement. Either download Jan directly or fork it on
+  [GitHub repository](https://github.com/menloresearch/jan) and build it from source.
 </FAQBox>
 
 <FAQBox title="What does Jan stand for?">
diff --git a/docs/src/pages/docs/install-extensions.mdx b/docs/src/pages/docs/install-extensions.mdx
deleted file mode 100644
index 53d535da4..000000000
--- a/docs/src/pages/docs/install-extensions.mdx
+++ /dev/null
@@ -1,143 +0,0 @@
----
-title: Install Extensions
-description: A step-by-step guide on installing an extension.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Jan Extensions,
-    Extensions,
-  ]
----
-
-import { Callout } from 'nextra/components'
-import { Settings, Plus } from 'lucide-react'
-
-
-# Install Extensions
-
-Jan uses a modular extension system that allows developers to add new functionality to the application. By default, Jan comes with several [pre-installed extensions](/docs/extensions#core-extensions) that provide core functionalities. You can manually add custom third-party extensions at your own risk.
-
-## Prerequisites
-
-### Required Tools
-- Node.js (version 20.0.0 or higher)
-- Basic understanding of TypeScript
-
-### Jan's Architecture
-Jan is built on:
-- **Electron**: Powers the desktop application
-  - Extensions run in the main process
-  - Access to Node.js APIs and filesystem
-  - Cannot use browser-only libraries
-- **Next.js**: Handles the user interface
-- **Node.js**: Runs extension logic
-
-This architecture means your extensions:
-- Can use Node.js packages and system APIs
-- Need to handle client/server communication properly
-- Should follow Electron's security practices
-
-## Create Extensions
-
-<Callout type="info">
-Jan currently only accepts `.tgz` file format for extensions.
-</Callout>
-
-> **Heads Up:**
-> - Use the following structure and setup as a **reference** only.
-> - You're free to develop extensions using any approach or structure that works for your needs. Feel free to experiment and innovate.
-> - If you already have your own `.tgz` extension file, please move forward to [install extension](/docs/install-extensions#install-extensions) step. 
-
-### Extension Structure
-Your extension should follow this basic structure:
-
-```
-my-extension/
-├── package.json           # Extension metadata and dependencies
-├── dist/                 # Compiled JavaScript files
-│   └── index.js         # Main extension entry point
-├── src/                 # Source code
-│   └── index.ts        # TypeScript source
-└── README.md           # Extension documentation
-```
-
-### Required package.json Fields
-```json
-{
-  "name": "@your-org/extension-name",
-  "version": "1.0.0",
-  "main": "dist/index.js",
-  "types": "dist/index.d.ts",
-  "jan": {
-    "type": "extension",
-    "displayName": "Your Extension Name",
-    "description": "Description of what your extension does"
-  },
-  "scripts": {
-    "build": "tsc",
-    "package": "npm pack"
-  },
-  "dependencies": {
-    // List your dependencies
-  },
-  "devDependencies": {
-    "typescript": "^5.0.0"
-  }
-}
-```
-
-### Example Extension Template
-
-You can find a template for creating Jan extensions in our [example repository](https://github.com/menloresearch/extension-template).
-## Install Extensions 
-
-To install a custom extension in Jan:
-
-1. Open Jan, navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Extensions**
-2. Click **<Plus width={16} height={16} style={{display:"inline"}}/> Install Extension**
-3. Select your extension `.tgz` file & save
-4. Restart Jan
-
-After restart, the `~/jan/data/extensions/extensions.json` file will be updated automatically to include your new extension.
-
-<br/>
-![Install Extensions](./_assets/extensions-04.png)
-<br/>
-
-## Troubleshooting
-**Check Extensions Logs**
-
-```bash
-# View application logs
-~/jan/logs/app.log
-
-# Open Developer Tools
-Mac: Cmd + Option + I
-Windows/Linux: Ctrl + Shift + I
-```
-**Common Error Patterns**
-
-1. Initialization Failures
-   - Extension fails to load/activate
-   - Verify package.json configuration
-   - Check extension dependencies
-
-2. Runtime Errors
-   - Node.js module errors
-   - API connection issues
-   - Authentication failures
-
-3. Build Problems
-```bash
-rm -rf dist/
-npm run build
-```
-
diff --git a/docs/src/pages/docs/desktop/_meta.json b/docs/src/pages/docs/installation/_meta.json
similarity index 51%
rename from docs/src/pages/docs/desktop/_meta.json
rename to docs/src/pages/docs/installation/_meta.json
index 5cc930af7..c1ea21ed1 100644
--- a/docs/src/pages/docs/desktop/_meta.json
+++ b/docs/src/pages/docs/installation/_meta.json
@@ -1,14 +1,14 @@
 {
   "mac": {
     "title": "Mac",
-    "href": "/docs/desktop/mac"
+    "href": "/docs/installation/mac"
   },
   "windows": {
     "title": "Windows",
-    "href": "/docs/desktop/windows"
+    "href": "/docs/installation/windows"
   },
   "linux": {
     "title": "Linux",
-    "href": "/docs/desktop/linux"
+    "href": "/docs/installation/linux"
   }
 }
diff --git a/docs/src/pages/docs/desktop/linux.mdx b/docs/src/pages/docs/installation/linux.mdx
similarity index 86%
rename from docs/src/pages/docs/desktop/linux.mdx
rename to docs/src/pages/docs/installation/linux.mdx
index bb5395bb9..7eacd67ce 100644
--- a/docs/src/pages/docs/desktop/linux.mdx
+++ b/docs/src/pages/docs/installation/linux.mdx
@@ -36,32 +36,36 @@ System requirements:
 <Tabs items={['OS', 'CPU', 'RAM', 'GPU', 'Disk']}>
 
 <Tabs.Tab>
+
 #### Debian-based (Supports `.deb` and `AppImage`)
+
 - Debian
 - Ubuntu and derivatives:
   - Ubuntu Desktop LTS (official)/Ubuntu Server LTS (only for server)
-  - Edubuntu (Mainly desktop)
-  - Kubuntu (Desktop only)
-  - Lubuntu (Both desktop and server, though mainly desktop)
-  - Ubuntu Budgie (Mainly desktop)
-  - Ubuntu Cinnamon (Desktop only)
-  - Ubuntu Kylin (Both desktop and server)
-  - Ubuntu MATE (Desktop only)
+  - Edubuntu
+  - Kubuntu
+  - Lubuntu
+  - Ubuntu Budgie
+  - Ubuntu Cinnamon
+  - Ubuntu Kylin
+  - Ubuntu MATE
+  - Linux Mint
+  - Pop!_OS
 
 #### RHEL-based (Supports `.rpm` and `AppImage`)
+
 - RHEL-based (Server only)
 - Fedora
 
 #### Arch-based
-- Arch Linux (Mainly desktop)
-- SteamOS (Desktop only)
+
+- Arch Linux
+- SteamOS
 
 #### Independent
-- openSUSE (Both desktop and server)
 
-<Callout type="info">
-Desktop or server support varies by distribution.
-</Callout>
+- openSUSE
+
 </Tabs.Tab>
 
 <Tabs.Tab>
@@ -95,7 +99,7 @@ DDR3 or newer recommended.
 </Tabs.Tab>
 
 <Tabs.Tab>
-Minimum 10GB free disk space required.
+Minimum 10GB of free disk space required.
 </Tabs.Tab>
 
 </Tabs>
@@ -145,22 +149,30 @@ Installation commands:
 
 <Tabs items={['Ubuntu', 'Others']}>
 <Tabs.Tab>
+
 ##### dpkg
+
 ```bash
 sudo dpkg -i jan-linux-amd64-{version}.deb
 ```
 
 ##### apt-get
+
 ```bash
 sudo apt-get install ./jan-linux-amd64-{version}.deb
 ```
+
 </Tabs.Tab>
 
 <Tabs.Tab>
+
+From the terminal, run the following commands:
+
 ```bash
 chmod +x jan-linux-x86_64-{version}.AppImage
 ./jan-linux-x86_64-{version}.AppImage
 ```
+
 </Tabs.Tab>
 </Tabs>
 
@@ -175,7 +187,7 @@ Default locations:
 # Custom installation directory
 $XDG_CONFIG_HOME = /home/username/custom_config
 
-or
+# or
 
 # Default installation directory
 ~/.config/Jan/data
@@ -204,7 +216,7 @@ lspci | grep -i nvidia
 
 **NVIDIA Driver:**
 
-1. Install [NVIDIA Driver](https://www.nvidia.com/en-us/drivers/) (version **470.63.01+**)
+1. Install the [NVIDIA Driver](https://www.nvidia.com/en-us/drivers/), ideally via your package manager.
 2. Verify:
 
 ```sh
@@ -213,7 +225,7 @@ nvidia-smi
 
 **CUDA Toolkit:**
 
-1. Install [CUDA toolkit](https://developer.nvidia.com/cuda-downloads) (**11.7+**)
+1. Install the [CUDA toolkit](https://developer.nvidia.com/cuda-downloads), ideally from your package manager (**11.7+**)
 2. Verify:
 
 ```sh
@@ -245,7 +257,7 @@ CUDA offers better performance than Vulkan.
 <Tabs.Tab>
 Requires Vulkan support.
 
-1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Local Engine** > **Llama.cpp**
+1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Hardware** > **GPUs**
 2. Select appropriate backend in **llama-cpp Backend**. Details in our [guide](/docs/local-engines/llama-cpp).
 
 </Tabs.Tab>
@@ -253,7 +265,7 @@ Requires Vulkan support.
 <Tabs.Tab>
 Requires Vulkan support.
 
-1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Local Engine** > **Llama.cpp**
+1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Hardware** > **GPUs**
 2. Select appropriate backend in **llama-cpp Backend**. Details in our [guide](/docs/local-engines/llama-cpp).
 </Tabs.Tab>
 
@@ -275,9 +287,8 @@ rm -rf ~/.config/Jan/cache
 <Tabs.Tab>
 
 ```bash
-sudo dnf remove jan
-rm -rf ~/.config/Jan/data
-rm -rf ~/.config/Jan/cache
+rm jan-linux-x86_64-{version}.AppImage
+rm -rf ~/.config/Jan
 ```
 </Tabs.Tab>
 </Tabs>
diff --git a/docs/src/pages/docs/desktop/mac.mdx b/docs/src/pages/docs/installation/mac.mdx
similarity index 100%
rename from docs/src/pages/docs/desktop/mac.mdx
rename to docs/src/pages/docs/installation/mac.mdx
diff --git a/docs/src/pages/docs/desktop/windows.mdx b/docs/src/pages/docs/installation/windows.mdx
similarity index 64%
rename from docs/src/pages/docs/desktop/windows.mdx
rename to docs/src/pages/docs/installation/windows.mdx
index 2213e3699..7cda7c8a3 100644
--- a/docs/src/pages/docs/desktop/windows.mdx
+++ b/docs/src/pages/docs/installation/windows.mdx
@@ -26,10 +26,13 @@ import { Settings } from 'lucide-react'
 
 
 # Windows Installation
+
 ## Compatibility
-System requirements:
+
+**System requirements:**
 - **Operating System**: Windows 10 or higher.
 - **CPU**
+
 <Tabs items={['Intel', 'AMD']}>
 <Tabs.Tab>
 - Intel: Haswell (Q2 2013) or newer
@@ -45,25 +48,25 @@ Processor must support **AVX2 or newer**. See [full list of supported processors
 Performance is significantly degraded on AVX-only processors.
 </Callout>
 
-- **Memory (RAM)**
-  - 8GB → up to 3B parameter models (int4)
-  - 16GB → up to 7B parameter models (int4)
-  - 32GB → up to 13B parameter models (int4)
+**Memory (RAM)**
+- 8GB → up to 3B parameter models (int4)
+- 16GB → up to 7B parameter models (int4)
+- 32GB → up to 13B parameter models (int4)
 
 <Callout type="info">
 Newer RAM generations provide better performance.
 </Callout>
 
-- **GPU**:
-  - 6GB → up to 3B parameter models
-  - 8GB → up to 7B parameter models
-  - 12GB → up to 13B parameter models
+**GPU**:
+- 6GB → up to 3B parameter models
+- 8GB → up to 7B parameter models
+- 12GB → up to 13B parameter models
 
 <Callout type="info">
 Minimum 6GB VRAM recommended for NVIDIA, AMD, or Intel Arc GPUs.
 </Callout>
 
-- **Storage:** 10GB free space minimum for app and models
+**Storage:** 10GB free space minimum for app and models
 
 
 ## Install Jan
@@ -151,8 +154,9 @@ nvidia-smi
 nvcc --version
 ```
 ### Step 2: Enable GPU Acceleration
-1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Local Engine** > **Llama.cpp**
-2. Select appropriate backend in **llama-cpp Backend**. See [our guide](/docs/local-engines/llama-cpp).
+
+Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Hardware** > **GPUs**
+and toggle the **ON** switch if not enabled.
 
 </Steps>
 
@@ -161,16 +165,15 @@ nvcc --version
 <Tabs.Tab>
 AMD GPUs require **Vulkan** support.
 
-1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Local Engine** > **Llama.cpp**
-2. Select appropriate backend in **llama-cpp Backend**. See [our guide](/docs/local-engines/llama-cpp).
-
+Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Hardware** > **GPUs**
+and toggle the **ON** switch if not enabled.
 </Tabs.Tab>
 
 <Tabs.Tab>
 Intel Arc GPUs require **Vulkan** support.
 
-1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Local Engine** > **Llama.cpp**
-2. Select appropriate backend in **llama-cpp Backend**. See [our guide](/docs/local-engines/llama-cpp).
+Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Hardware** > **GPUs**
+and toggle the **ON** switch if not enabled.
 </Tabs.Tab>
 </Tabs>
 
@@ -197,6 +200,7 @@ Remove app data:
 2. Delete Jan folder
 
 or via **Terminal**:
+
 ```sh
 cd C:\Users\%USERNAME%\AppData\Roaming
 rmdir /S Jan
@@ -206,25 +210,3 @@ rmdir /S Jan
 <Callout type="warning">
 Deleted data folders cannot be recovered. Backup important data first.
 </Callout>
-
-{/* ## FAQs
-
-<FAQBox title="What are Nightly Releases, and how can I access them?">
-Nightly Releases allow you to test new features and previews of upcoming stable releases. You can download
-them from Jan's GitHub repository. However, remember that these builds might contain bugs and crash frequently.
-</FAQBox>
-<FAQBox title="Can I move the Jan data folder to a different location?">
-Yes, you can move the Jan data folder.
-</FAQBox>
-<FAQBox title="How do I enable GPU acceleration for better performance?">
-Depending on your GPU type (NVIDIA, AMD, or Intel), follow the respective instructions provided in the
-[GPU Acceleration](https://www.notion.so/docs/desktop/windows#gpu-acceleration) section above.
-</FAQBox>
-<FAQBox title="Can I recover the deleted Jan data folder after uninstallation?">
-No, it cannot be restored once you delete the Jan data folder during uninstallation.
-</FAQBox>
-
-<Callout type="warning">
-If you have any trouble during installation, please see our [Troubleshooting](https://www.notion.so/docs/troubleshooting)
-guide to resolve your problem.
-</Callout> */}
diff --git a/docs/src/pages/docs/local-engines/llama-cpp.mdx b/docs/src/pages/docs/llama-cpp.mdx
similarity index 98%
rename from docs/src/pages/docs/local-engines/llama-cpp.mdx
rename to docs/src/pages/docs/llama-cpp.mdx
index df11c48ca..2b4d76738 100644
--- a/docs/src/pages/docs/local-engines/llama-cpp.mdx
+++ b/docs/src/pages/docs/llama-cpp.mdx
@@ -27,13 +27,13 @@ import { Tabs } from 'nextra/components'
 import { Callout, Steps } from 'nextra/components'
 import { Settings, EllipsisVertical, Plus, FolderOpen, Pencil } from 'lucide-react'
 
-# llama.cpp (Cortex)
+# Local Model Management
 
 ## Overview
 Jan uses **llama.cpp** for running local AI models. You can find its settings in **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Local Engine** > **llama.cpp**:
 
 <br/>
-![llama.cpp](../_assets/llama.cpp-01.png)
+![llama.cpp](./_assets/llama.cpp-01.png)
 <br/>
 
 These settings are for advanced users, you would want to check these settings when:
@@ -162,10 +162,3 @@ For detailed hardware compatibility, please visit our guide for [Mac](/docs/desk
 <Callout type="info">
 Performance impact varies by hardware, model size, and usage patterns.
 </Callout>
-
-
-
-
-
-
-
diff --git a/docs/src/pages/docs/local-engines/_meta.json b/docs/src/pages/docs/local-engines/_meta.json
deleted file mode 100644
index 2e323af69..000000000
--- a/docs/src/pages/docs/local-engines/_meta.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-  "llama-cpp": {
-    "title": "llama.cpp",
-    "href": "/docs/local-engines/llama-cpp"
-  }
-}
diff --git a/docs/src/pages/docs/manage-models.mdx b/docs/src/pages/docs/manage-models.mdx
new file mode 100644
index 000000000..7e92e0192
--- /dev/null
+++ b/docs/src/pages/docs/manage-models.mdx
@@ -0,0 +1,204 @@
+---
+title: Managing Models
+description: Manage your interaction with AI models locally.
+keywords:
+  [
+    Jan,
+    Customizable Intelligence, LLM,
+    local AI,
+    privacy focus,
+    free and open source,
+    private and offline,
+    conversational AI,
+    no-subscription fee,
+    large language models,
+    threads,
+    chat history,
+    thread history,
+  ]
+---
+import { Callout, Steps } from 'nextra/components'
+import { Settings, Trash2, EllipsisVertical, Plus, FolderOpen, Pencil } from 'lucide-react'
+
+
+# Model Management
+
+This guide shows you how to add, customize, and delete models within Jan.
+
+## Local Model
+
+Local models are managed through [Llama.cpp](https://github.com/ggerganov/llama.cpp), and these models are in a
+format called GGUF. When you run them locally, they will use your computer's memory (RAM) and processing power, so
+please make sure that you download models that match the hardware specifications for your operating system:
+- [Mac](/docs/desktop/mac#compatibility)
+- [Windows](/docs/desktop/windows#compatibility)
+- [Linux](/docs/desktop/linux#compatibility).
+
+### AddING Models
+
+#### 1. Download from Jan Hub (Recommended)
+
+The easiest way to get started is using Jan's built-in model hub (which is connected to [HuggingFace's Model Hub](https://huggingface.co/models):
+1. Go to the **Hub** tab
+2. Browse available models and click on any model to see details about it
+3. Choose a model that fits your needs & hardware specifications
+4. Click **Download** on your chosen model
+
+<Callout type="info">
+Jan will indicate if a model might be **Slow on your device** or **Not enough RAM** based on your system specifications.
+</Callout>
+
+<br/>
+![Download Model](./_assets/model-management-01.png)
+<br/>
+
+#### 2. Import from [Hugging Face](https://huggingface.co/)
+
+You can download models with a direct link from Hugging Face:
+
+**Note:** Some models require a Hugging Face Access Token. Enter your token in **Settings > Model Providers > Hugging Face** before importing.
+
+1. Visit the [Hugging Face Models](https://huggingface.co/models) page.
+2. Find the model you want to use, make sure it is a GGUF file that fits in your computer.
+3. Copy the **model ID** (e.g., TheBloke/Mistral-7B-v0.1-GGUF)
+4. In Jan, paste the model ID to the **Search** bar in **Hub** page
+5. Select your preferred quantized version to download (if the option is available)
+
+<br/>
+**Copy the model ID.**
+![Find HF Model](./_assets/hf-unsloth.png)
+
+<br/>
+**Paste it in Jan's Hub Search Bar.**
+![Import Model](./_assets/model-management-02.png)
+<br/>
+
+#### 3. Import Local Files
+
+If you already have one or many GGUF model files on your computer:
+1. In Jan, go to **Settings > Model Providers > Llama.cpp**
+2. Click **Import** and select your GGUF file(s)
+3. Choose how you want to import:
+  - **Link Files:** Creates symbolic links to your model files (saves space)
+  - **Duplicate:** Makes a copy of model files in Jan's directory
+4. Click **Import** to complete (check the [Jan Data Folder](./data-folder) section for more info)
+
+<Callout type="warning">
+You need to own your **model configurations**, use at your own risk. Misconfigurations may result in lower
+quality or unexpected outputs. Learn about [model configurations here](./model-parameters).
+</Callout>
+
+<br/>
+![Download Model](./_assets/model-management-04.png)
+<br/>
+
+<br/>
+![Download Model](./_assets/model-import-04.png)
+<br/>
+
+<br/>
+![Download Model](./_assets/model-import-05.png)
+<br/>
+
+#### 4. Manual Setup
+
+For advanced users who want to add a specific model that is not available within the Jan **Hub**:
+
+<Steps>
+##### Step 1: Create Model File
+1. Navigate to the [Jan Data Folder](./data-folder)
+2. Open `models` folder
+3. Create a new **Folder** for your model
+4. Add your `model.gguf` file
+5. Add your `model.json` file with your configuration. Here's an example with "TinyLlama Chat 1.1B Q4":
+
+```json
+{
+  "sources": [
+    {
+      "filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+    }
+  ],
+  "id": "tinyllama-1.1b",
+  "object": "model",
+  "name": "TinyLlama Chat 1.1B Q4",
+  "version": "1.0",
+  "description": "TinyLlama is a tiny model with only 1.1B. It's a good model for less powerful computers.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>",
+    "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 2048,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "TinyLlama",
+    "tags": [
+      "Tiny",
+      "Foundation Model"
+    ],
+    "size": 669000000
+  },
+  "engine": "nitro"
+}
+```
+##### Step 2: Modify Model Parameters
+
+Key fields to configure:
+1. The **Settings** array is where you can set the path or location of your model in your computer, the context
+length allowed, and te chat themplate expected by your model.
+2. The [**Parameters**](/docs/model-parameters) are the adjustable settings that affect how your model operates or
+processes the data. The fields in the parameters array are typically general and can be the used across different
+models. Here is an example of model parameters:
+
+```json
+"parameters":{
+  "temperature": 0.7,
+  "top_p": 0.95,
+  "stream": true,
+  "max_tokens": 4096,
+  "frequency_penalty": 0,
+  "presence_penalty": 0,
+}
+```
+</Steps>
+
+### Delete Models
+1. Go to **Settings > Model Providers > Llama.cpp**
+2. Find the model you want to remove
+3. Select the three dots <EllipsisVertical width={16} height={16} style={{display:"inline"}}/> icon next to it and select **Delete Model**
+
+<br/>
+![Delete Model](./_assets/model-management-05.png)
+<br/>
+
+
+## Cloud Models
+
+<Callout type="info">
+When using cloud models, be aware of any associated costs and rate limits from the providers. See detailed guide for
+each cloud model provider [here](/docs/remote-models/anthropic).
+</Callout>
+
+Jan supports connecting to various AI cloud providers that are OpenAI API-compatible, including: OpenAI (GPT-4o, o3,...),
+Anthropic (Claude), Groq, Mistral, and more.
+1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>)
+2. Under **Model Providers** section in the left sidebar, choose your preferred provider (OpenAI, Anthropic, etc.)
+3. Enter your API key
+4. The activated cloud models will be available in your model selector inside the **Chat** panel
+
+<br/>
+![Download Model](./_assets/model-management-06.png)
+<br/>
+As soon as you add your key for a model provider like Anthropic or OpenAI, you will be able to pick one of their models to chat with.
+![Connect Remote APIs](./_assets/quick-start-03.png)
+<br/>
diff --git a/docs/src/pages/cortex/assistants.mdx b/docs/src/pages/docs/mcp.mdx
similarity index 66%
rename from docs/src/pages/cortex/assistants.mdx
rename to docs/src/pages/docs/mcp.mdx
index 90e29c29a..8a9338459 100644
--- a/docs/src/pages/cortex/assistants.mdx
+++ b/docs/src/pages/docs/mcp.mdx
@@ -1,6 +1,6 @@
 ---
-title: Assistants
-description: Assistants
+title: Model Context Protocol
+description: Manage your interaction with AI locally.
 keywords:
   [
     Jan,
@@ -12,11 +12,11 @@ keywords:
     conversational AI,
     no-subscription fee,
     large language models,
-    Cortex,
-    Jan,
-    LLMs
+    threads,
+    chat history,
+    thread history,
   ]
 ---
-
 import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
\ No newline at end of file
+
+# MCP
diff --git a/docs/src/pages/docs/models/model-parameters.mdx b/docs/src/pages/docs/model-parameters.mdx
similarity index 95%
rename from docs/src/pages/docs/models/model-parameters.mdx
rename to docs/src/pages/docs/model-parameters.mdx
index c4773fb30..4b3895c6c 100644
--- a/docs/src/pages/docs/models/model-parameters.mdx
+++ b/docs/src/pages/docs/model-parameters.mdx
@@ -17,7 +17,7 @@ keywords:
     thread history,
   ]
 ---
-import { Callout, Steps } from 'nextra/components' 
+import { Callout, Steps } from 'nextra/components'
 
 # Model Parameters
 To customize model settings for a conversation or a model:
@@ -51,11 +51,11 @@ These toggles are available when you click the edit button next to a model:
 - **Reasoning**: Enable advanced reasoning features
 
 <br/>
-![Download Model](../_assets/model-parameters.png)
+![Download Model](./_assets/model-parameters.png)
 <br/>
 
 ### Model Parameters
-This setting defines and configures the model's behavior. 
+This setting defines and configures the model's behavior.
 | Parameter           | Description                                                                                                                                                                  |
 |---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | **Prompt Template**     | A structured format that guides how the model should respond. Contains **placeholders** and **instructions** that help shape the model's output in a consistent way.|
@@ -68,6 +68,5 @@ These settings parameters control how the model runs on your hardware.
 | **Context Length**  | - Controls how much text the model can consider at once.<br></br>- Longer context allows the model to handle more input but uses more memory and runs slower.<br></br>- The maximum context length varies with the model used.<br></br>|
 
 <Callout type="info">
-By default, Jan defaults to the minimum between **8192** and the model's maximum context length, you can adjust this based on your needs. 
+By default, Jan defaults to the minimum between **8192** and the model's maximum context length, you can adjust this based on your needs.
 </Callout>
-
diff --git a/docs/src/pages/docs/models.mdx b/docs/src/pages/docs/models.mdx
deleted file mode 100644
index 76aa6c991..000000000
--- a/docs/src/pages/docs/models.mdx
+++ /dev/null
@@ -1,35 +0,0 @@
----
-title: Models
-description: Jan is a ChatGPT-alternative that runs on your computer, with a local API server.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Hardware Setup,
-    GPU,
-  ]
----
-
-import { Cards, Card } from 'nextra/components'
-import childPages from './models/_meta.json';
-
-# Models
-
-<br/>
-
-<Cards
-  children={Object.keys(childPages).map((key, i) => (
-    <Card
-      key={i}
-      title={childPages[key].title}
-      href={childPages[key].href}
-    />
-  ))}
-/>
\ No newline at end of file
diff --git a/docs/src/pages/docs/models/_meta.json b/docs/src/pages/docs/models/_meta.json
deleted file mode 100644
index 6b33e308e..000000000
--- a/docs/src/pages/docs/models/_meta.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-  "manage-models": {
-    "title": "Model Management",
-    "href": "/docs/models/manage-models"
-  },
-  "model-parameters": {
-    "title": "Model Parameters",
-    "href": "/docs/models/model-parameters"
-  }
-}
diff --git a/docs/src/pages/docs/models/manage-models.mdx b/docs/src/pages/docs/models/manage-models.mdx
deleted file mode 100644
index 3a98d73a7..000000000
--- a/docs/src/pages/docs/models/manage-models.mdx
+++ /dev/null
@@ -1,239 +0,0 @@
----
-title: Managing Models
-description: Manage your interaction with AI locally.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    threads,
-    chat history,
-    thread history,
-  ]
----
-import { Callout, Steps } from 'nextra/components'
-import { Settings, Trash2, EllipsisVertical, Plus, FolderOpen, Pencil } from 'lucide-react'
-
-
-# Model Management
-This guide provides comprehensive instructions on adding, customizing, and deleting models within Jan. 
-
-## Local Model
-
-Jan offers flexible options for managing local models through its [Cortex](https://cortex.so/) engine. Currently, Jan only supports **GGUF format** models.
-<Callout type="info">
-Local models run directly on your computer, which means they use your computer's memory (RAM) and processing power. Please choose models carefully based on your hardware specifications for [Mac](/docs/desktop/mac#compatibility), [Windows](/docs/desktop/windows#compatibility), & [Linux](/docs/desktop/linux#compatibility).
-</Callout>
-
-### Add Models
-
-#### 1. Download from Jan Hub (Recommended)
-The easiest way to get started is using Jan's built-in model hub:
-1. Go to **Hub**
-2. Browse available models and click on any model to see details about it
-3. Choose a model that fits your needs & hardware specifications
-4. Click **Download** on your chosen model
-
-<Callout type="info">
-Jan will indicate if a model might be **Slow on your device** or **Not enough RAM** based on your system specifications.
-</Callout>
-
-<br/>
-![Download Model](../_assets/model-management-01.png)
-<br/>
-
-#### 2. Import from [Hugging Face](https://huggingface.co/)
-You can import GGUF models directly from Hugging Face:
-
-**Note:** Some models require a Hugging Face Access Token. Enter your token in **Settings > Model Providers > Hugging Face** before importing.
-
-##### Option A: Import in Jan
-1. Visit [Hugging Face Models](https://huggingface.co/models).
-2. Find a GGUF model you want to use
-3. Copy the **model ID** (e.g., TheBloke/Mistral-7B-v0.1-GGUF) or its **URL**
-4. In Jan, paste the model ID/URL to the **Search** bar in **Hub**
-5. Select your preferred quantized version to download
-
-<br/>
-![Import Model](../_assets/model-management-02.png)
-<br/>
-
-##### Option B: Use Deep Link
-You can use Jan's deep link feature to quickly import models:
-1. Visit [Hugging Face Models](https://huggingface.co/models).
-2. Find the GGUF model you want to use
-3. Copy the **model ID**, for example: `bartowski/Llama-3.2-3B-Instruct-GGUF`
-4. Create a **deep link URL** in this format:
-```
-jan://models/huggingface/<model_id>
-```
-5. Enter the URL in your browser & **Enter**, for example:
-```
-jan://models/huggingface/bartowski/Llama-3.2-3B-Instruct-GGUF
-```
-6. A prompt will appear: `This site is trying to open Jan`, click **Open** to open Jan app.
-7. Select your preferred quantized version to download
-
-<Callout type="warning">
-Deep linking won't work for models requiring API tokens or usage agreements. You'll need to download these models manually through the Hugging Face website.
-</Callout>
-
-<br/>
-![Deep Link](../_assets/model-management-03.png)
-<br/>
-
-
-
-#### 3. Import Local Files
-If you already have GGUF model files on your computer:
-1. In Jan, go to **Settings > Model Providers > Llama.cpp**
-2. Click **Import** and select your GGUF file(s)
-3. Choose how you want to import:
-  - **Link Files:** Creates symbolic links to your model files (saves space)
-  - **Duplicate:** Makes a copy of model files in Jan's directory
-4. Click **Import** to complete
-
-<Callout type="warning">
-You need to own your **model configurations**, use at your own risk. Misconfigurations may result in lower quality or unexpected outputs. 
-</Callout>
-
-<br/>
-![Download Model](../_assets/model-management-04.png)
-<br/>
-
-#### 4. Manual Setup
-For advanced users who add a specific model that is not available within Jan **Hub**:
-<Steps>
-##### Step 1: Create Model File
-1. Navigate to [Jan Data Folder]()
-2. Open `models` folder
-3. Create a new **Folder** for your model
-4. Add a `model.json` file with your configuration:
-```bash
-  "id": "<unique_identifier_of_the_model>",
-  "object": "<type_of_object, e.g., model, tool>",
-  "name": "<name_of_the_model>",
-  "version": "<version_number>",
-  "description": "<brief_description_of_the_model>",
-  "format": "<format_of_the_model_api_or_other>",
-  "settings": "<additional_settings_as_needed>",
-  "parameters": {
-    "max_tokens": "<maximum_number_of_tokens_the_model_can_generate>",
-    "temperature": "<temperature_setting_for_randomness_in_generation>"
-  },
-  "metadata": {
-    "author": "<name_of_the_creator_or_organization>",
-    "tags": ["<list_of_relevant_tags_describing_the_model>"]
-  },
-  "engine": "<engine_or_platform_the_model_runs_on>",
-  "source": "<url_or_source_of_the_model_information>"
-```
-Here's model "TinyLlama Chat 1.1B Q4" as an example:
-```json
-{
-  "sources": [
-    {
-      "filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
-      "url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
-    }
-  ],
-  "id": "tinyllama-1.1b",
-  "object": "model",
-  "name": "TinyLlama Chat 1.1B Q4",
-  "version": "1.0",
-  "description": "TinyLlama is a tiny model with only 1.1B. It's a good model for less powerful computers.",
-  "format": "gguf",
-  "settings": {
-    "ctx_len": 4096,
-    "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>",
-    "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
-  },
-  "parameters": {
-    "temperature": 0.7,
-    "top_p": 0.95,
-    "stream": true,
-    "max_tokens": 2048,
-    "stop": [],
-    "frequency_penalty": 0,
-    "presence_penalty": 0
-  },
-  "metadata": {
-    "author": "TinyLlama",
-    "tags": [
-      "Tiny",
-      "Foundation Model"
-    ],
-    "size": 669000000
-  },
-  "engine": "nitro"
-}
-```
-##### Step 2: Modify Model Parameters
-Modify model parameters under the settings array. Key fields to configure:
-1. **Settings** is where you can set your engine configurations.
-2. [**Parameters**](/docs/models/model-parameters) are the adjustable settings that affect how your model operates or processes the data. The fields in parameters are typically general and can be the same across models. Here is an example of model parameters:
-```
-"parameters":{
-  "temperature": 0.7,
-  "top_p": 0.95,
-  "stream": true,
-  "max_tokens": 4096,
-  "frequency_penalty": 0,
-  "presence_penalty": 0,
-}
-```
-</Steps>
-
-### Delete Models
-1. Go to **Settings > Model Providers > Llama.cpp**
-2. Find the model you want to remove
-3. Select the three dots <EllipsisVertical width={16} height={16} style={{display:"inline"}}/> icon next to it and select **Delete Model**
-
-<br/>
-![Delete Model](../_assets/model-management-05.png)
-<br/>
-
-
-## Cloud model
-<Callout type="info">
-When using cloud models, be aware of any associated costs and rate limits from the providers. See detailed guide for each cloud model provider [here](/docs/remote-models/anthropic).
-</Callout>
-
-Jan supports connecting to various AI cloud providers that are OpenAI API-compatible, including: OpenAI (GPT-4, o1,...), Anthropic (Claude), Groq, Mistral, and more.
-1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>)
-2. Under **Remote Engine** section in the left sidebar, choose your preferred engines (OpenAI, Anthropic, etc.)
-3. Enter your API key
-4. The activated cloud models will be available in your model selector in **Threads**
-
-<br/>
-![Download Model](../_assets/model-management-06.png)
-<br/>
-
-You can manage & hide respective provider engines in **Settings** > **Engines**:
-<br/>
-![Download Model](../_assets/model-management-07.png)
-<br/>
-
-### Add Models
-1. In your respective provider's settings page, click **Add Model**
-2. Enter the **model ID**. Check detailed model IDs in the provider's model list, for example, `claude-3-opus-latest`.
-3. A success message will appear when the model is added
-
-<br/>
-![Add Model](../_assets/model-management-08.png)
-<br/>
-
-### Delete Models
-1. In your respective provider's settings page, choose the model you want to delete
-2. Click **Trash Bin** (<Trash2 width={16} height={16} style={{display:"inline"}}/>) icon
-3. The model will be removed from your available models list
-
-<br/>
-![Delete Model](../_assets/model-management-09.png)
-<br/>
\ No newline at end of file
diff --git a/docs/src/pages/docs/quickstart.mdx b/docs/src/pages/docs/quickstart.mdx
index 07a691d1f..92473bcbf 100644
--- a/docs/src/pages/docs/quickstart.mdx
+++ b/docs/src/pages/docs/quickstart.mdx
@@ -1,6 +1,6 @@
 ---
 title: Installation
-description: Get started quickly with Jan, a ChatGPT-alternative that runs on your own computer, with a local API server. Learn how to install Jan and select an AI model to start chatting.
+description: Jan is an open-source AI assistant and self-hosted AI platform - build and run AI on your own desktop or server.
 sidebar_position: 2
 keywords:
   [
@@ -20,64 +20,78 @@ keywords:
   ]
 ---
 
-import { Tabs } from 'nextra/components'
 import { Callout, Steps } from 'nextra/components'
 import { Settings } from 'lucide-react'
 
 
 # Quickstart
+
 <Steps>
 
 ### Step 1: Install Jan
+
 1. [Download Jan](/download)
-2. Install the application on your system ([Mac](/docs/desktop/mac), [Windows](/docs/desktop/windows), [Linux](/docs/desktop/linux))
+2. Install the app on your system ([Mac](/docs/desktop/mac), [Windows](/docs/desktop/windows), [Linux](/docs/desktop/linux))
 3. Launch Jan
 
-Once installed, you'll see Jan application interface with no models pre-installed yet. You'll be able to:
+Once installed, you'll see Jan's interface with no pre-installed models. You can:
 - Download and run local AI models
-- Connect to cloud AI providers if desired
+- Connect to cloud-based AI model providers if desired
 <br/>
 
-![Default State](./_assets/quick-start-01.png)
-
+![Default State](./_assets/jan_ui.png)
 <br/>
 
 ### Step 2: Download a Model
-Jan offers various local AI models, from smaller efficient models to larger more capable ones:
-1. Go to **Hub**
-2. Browse available models and click on any model to see details about it
-3. Choose a model that fits your needs & hardware specifications
-4. Click **Download** to begin
-<Callout type="info">
-Local models run directly on your computer, which means they use your computer's memory (RAM) and processing power. Please choose models carefully based on your hardware specifications ([Mac](/docs/desktop/mac#minimum-requirements), [Windows](/docs/desktop/windows#compatibility), [Linux](/docs/desktop/linux#compatibility)).
-</Callout>
 
-**Note:** Some models from Hugging Face require an access token. Enter your token in **Settings > Model Providers > Llama.cpp > Hugging Face Access Token** before importing.
-
-For more model installation methods, please visit [Model Management](/docs/models/manage-models).
+Jan offers various local AI models, from nimble lightweights to hefty powerhouses:
+1. Go to the **Hub Tab** or to [HuggingFace](https://huggingface.co/models) where you will be able
+to find even more models alongside their details. (TIP: You can copy the URL or name of the model on the Hub Tab
+and download it there.)
+2. Browse models and tap any for details (Models need to be in GGUF format)
+3. Select one that matches your needs & hardware specs
+4. Hit **Download** to begin (a progress bar will appear for the duration of the download)
 
 <br/>
 ![Download a Model](./_assets/model-management-01.png)
 <br/>
 
+<Callout type="warning">
+Local models consume your computer's memory and processing power. Choose carefully based on your hardware
+specifications ([Mac](/docs/desktop/mac#minimum-requirements), [Windows](/docs/desktop/windows#compatibility),
+[Linux](/docs/desktop/linux#compatibility)).
+</Callout>
+
+**Note:** Some Hugging Face models require an access token. Enter yours
+in **Settings > Model Providers > Llama.cpp > Hugging Face Access Token** before importing.
+
+<br/>
+![Add HF Token](./_assets/hf_token.png)
+<br/>
+
+
+For alternative installation methods, see the [Model Management](/manage-models) section.
+
+
+
 ### Step 3: Turn on GPU Acceleration (Optional)
-While the model downloads, let's optimize your hardware setup. If you're on **Windows** or **Linux** and have a
-compatible graphics card, you can significantly boost model performance by enabling GPU acceleration.
-1. Navigate to **(<Settings width={16} height={16} style={{display:"inline"}}/>) Settings** > **Model Providers** > **Llama.cpp**
-2. At **llama-cpp Backend**, select backend. For example `windows-amd64-vulkan` if you have an AMD gaphic card. For
-more info, see [our guide](/docs/local-engines/llama-cpp).
+
+While your model downloads, let's supercharge your setup. On **Windows** or **Linux** with
+a compatible graphics card, you can dramatically boost performance with GPU acceleration.
+1. Head to **(<Settings width={16} height={16} style={{display:"inline"}}/>) Settings** > **Hardware**
+2. Under **GPUs**, toggle the setting to ON if not already enabled.
+
+![Turn on GPU acceleration](./_assets/gpu_accl.png)
 
 <Callout type="info">
-Ensure you have installed all required dependencies and drivers before enabling GPU acceleration. See **GPU Setup Guide**
-on [Windows](/docs/desktop/windows#gpu-acceleration) & [Linux](/docs/desktop/linux#gpu-acceleration) for detailed instructions.
+Install all required dependencies and drivers before enabling GPU acceleration. Check the **GPU Setup Guide** for [Windows](/docs/desktop/windows#gpu-acceleration) & [Linux](/docs/desktop/linux#gpu-acceleration).
 </Callout>
 <br/>
 
-![Turn on GPU acceleration](./_assets/trouble-shooting-03.png)
-
 ### Step 4: Customize Assistant Instructions
-Once your model has been downloaded and you're ready to start your first conversation, you can customize how the model
-should respond by modifying specific instructions or model configurations in [Assistant.](/docs/assistants)
+
+With your model ready to roll, you can tailor how it responds by tweaking instructions or model configurations
+in [Assistant.](/docs/assistants).
 
 <br/>
 
@@ -85,15 +99,24 @@ should respond by modifying specific instructions or model configurations in [As
 
 <br/>
 
-### Step 5: Start Chatting and Fine-tune Settings
+You can also go to the assistant tab to manage all of your personalized instructions. The cool thing about
+these is that you can use them no matter which model you choose.
 
-Now that your model is downloaded and instructions are set, you can begin chatting with it. Type your message in
-the **input field** at the bottom of the thread to start the conversation.
+<br/>
 
-You can further customize your experience by:
-- Adjusting the [model parameters](/docs/models/model-parameters) in the **Model Configurations** by clicking on the **Gear icon** next to the selected model or in the **Assistant Settings**
-- Try different models for different tasks by clicking the **model selector** in **Model** tab or **input field**
-- [Create new threads](/docs/threads#creating-new-thread) with different instructions and model configurations
+![Add an Assistant Instruction](./_assets/add_assistant.png)
+
+<br/>
+
+### Step 5: Start Chatting and Update the Settings
+
+Model downloaded? Instructions set? Time to chat. Type your message in the **input field** at
+the bottom to kickstart the conversation.
+
+Fine-tune your experience by:
+- Tweaking [model parameters](/docs/model-parameters) via the **Gear icon** next to your model or in **Assistant Settings**
+- Switching models for different tasks through the **model selector** in **Model** tab or **input field**
+- [Creating new threads](/docs/threads#creating-new-thread) with custom instructions and configurations
 
 <br/>
 
@@ -104,23 +127,17 @@ You can further customize your experience by:
 
 ### Step 6: Connect to cloud models (Optional)
 
-Jan supports both open source and cloud-based models. You can connect to cloud model providers that are including: OpenAI
-(GPT-4o, o1,...), Anthropic (Claude), Groq, Mistral, and more.
+Jan plays nice with both open source and cloud models. Connect to OpenAI (GPT-4o, o1), Anthropic (Claude), Groq, Mistral, and others:
 1. Open any **Thread**
-2. Select a model from the **model selector** dropdown in input field
-3. Select your preferred provider (Anthropic, OpenAI, etc.), click **Gear icon** next to the provider
-4. Obtain a valid API key from your chosen provider, ensure the key has sufficient credits & appropriate permissions
-5. Copy & insert your **API Key** in Jan
+2. Select a model from the **model selector** dropdown
+3. Pick your provider, click the **Gear icon** beside it
+4. Grab a valid API key from your chosen provider (make sure it has sufficient credits)
+5. Paste your **API Key** into Jan
 
-See [Remote APIs](/docs/remote-models/openai) for detailed configuration.
+For detailed setup, check [Remote APIs](/docs/remote-models/openai).
 
 <br/>
 ![Connect Remote APIs](./_assets/quick-start-03.png)
 <br/>
 
 </Steps>
-
-## What's Next?
-Now that Jan is up and running, explore further:
-1. Learn how to download and manage your [models](/docs/models).
-2. Customize Jan's [application settings](/docs/settings) according to your preferences.
diff --git a/docs/src/pages/docs/remote-models/_meta.json b/docs/src/pages/docs/remote-models/_meta.json
index 62709526e..39660be88 100644
--- a/docs/src/pages/docs/remote-models/_meta.json
+++ b/docs/src/pages/docs/remote-models/_meta.json
@@ -7,41 +7,24 @@
     "title": "Cohere",
     "href": "/docs/remote-models/cohere"
   },
-  "deepseek": { 
-    "title": "DeepSeek", 
-    "href": "/docs/remote-models/deepseek"
-  },
-  "google": { 
-    "title": "Google", 
+  "google": {
+    "title": "Gemini",
     "href": "/docs/remote-models/google"
   },
-  "groq": { 
-    "title": "Groq", 
+  "groq": {
+    "title": "Groq",
     "href": "/docs/remote-models/groq"
   },
-  "martian": {
-    "title": "Martian",
-    "href": "/docs/remote-models/martian"
-  },
   "mistralai": {
     "title": "Mistral AI",
     "href": "/docs/remote-models/mistralai"
   },
-  "nvidia-nim": {
-    "title": "Nvidia",
-    "href": "/docs/remote-models/nvidia-nim"
-  },
   "openai": {
     "title": "OpenAI",
     "href": "/docs/remote-models/openai"
   },
-  "openrouter": { 
-    "title": "OpenRouter", 
+  "openrouter": {
+    "title": "OpenRouter",
     "href": "/docs/remote-models/openrouter"
-  },
-  "triton": {
-    "title": "Triton-TRT-LLM",
-    "href": "/docs/remote-models/triton",
-    "display": "hidden" 
   }
-}
\ No newline at end of file
+}
diff --git a/docs/src/pages/docs/remote-models/anthropic.mdx b/docs/src/pages/docs/remote-models/anthropic.mdx
index cf83782b9..67eebcb9c 100644
--- a/docs/src/pages/docs/remote-models/anthropic.mdx
+++ b/docs/src/pages/docs/remote-models/anthropic.mdx
@@ -20,13 +20,15 @@ import { Settings, Plus } from 'lucide-react'
 
 # Anthropic
 
-Jan supports [Anthropic](https://anthropic.com/) API integration, allowing you to use Claude models (Claude 3, Claude 2.1, and more) through Jan's interface.
+Jan supports all of [Anthropic's models](https://anthropic.com/) via API integration, allowing
+you to chat with Claude's latest Opus, Sonnet and Haiku models.
 
 ## Integrate Anthropic API with Jan
 
 <Steps>
-### Step 1: Get Your API Key 
-1. Visit [Anthropic Console](https://console.anthropic.com/settings/keys) and sign in 
+
+### Step 1: Get Your API Key
+1. Visit [Anthropic Console](https://console.anthropic.com/settings/keys) and sign in
 2. Create & copy a new API key or copy your existing one
 
 <Callout type='info'>
@@ -34,35 +36,29 @@ Ensure your API key has sufficient credits
 </Callout>
 
 ### Step 2: Configure Jan
-There are two ways to add your Anthropic API keys in Jan:
 
-**Through Threads:**
-1. In Threads, click **Model** tab in the **right sidebar** or **model selector** in input field
-2. Once the selector is poped up, choose the **Cloud** tab
-3. Click **Add** (<Plus width={16} height={16} style={{display:"inline"}}/>) icon next to **Anthropic**
-4. Once you are directed to Anthropic settings, insert your **API Key**  
-
-**Through Settings:**
 1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>)
-2. Under **Remote Engines**, select **Anthropic**
-3. Insert your **API Key**  
+2. Under **Model Providers**, select **Anthropic**
+3. Insert your **API Key**
 
 <br/>
-![Anthropic](../_assets/anthropic.png)
+![Anthropic](../_assets/model-management-06.png)
 <br/>
 
 ### Step 3: Start Using Anthropic's Models
 
-1. In any existing **Threads** or create a new one
+1. In any existing **Chat** or create a new one
 2. Select an Anthropic model from **model selector**
 3. Start chatting
+
 </Steps>
 
 ## Available Anthropic Models
 
-Jan automatically includes Anthropic's available models. In case you want to use a specific Anthropic model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/models/manage-models#add-models-1):
-  - See list of available models in [Anthropic Models](https://docs.anthropic.com/claude/docs/models-overview).
-  - The `id` property must match the model name in the list. For example, `claude-3-opus-20240229`, `claude-3-sonnet-20240229`, or `claude-2.1`.
+Jan automatically includes Anthropic's available models. In case you want to use a specific Anthropic model
+that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/manage-models#add-models-1):
+- See list of available models in [Anthropic Models](https://docs.anthropic.com/claude/docs/models-overview).
+- The `id` property must match the model name in the list. For example, `claude-opus-4@20250514`, `claude-sonnet-4@2025051`, or `claude-3-5-haiku@20241022`.
 
 ## Troubleshooting
 
@@ -83,4 +79,5 @@ Common issues and solutions:
 - Check if you're using the correct model ID
 - Verify your Anthropic account has the necessary permissions
 
-Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the [Anthropic documentation](https://docs.anthropic.com/claude/docs).
\ No newline at end of file
+Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the
+[Anthropic's documentation](https://docs.anthropic.com/claude/docs).
diff --git a/docs/src/pages/docs/remote-models/cohere.mdx b/docs/src/pages/docs/remote-models/cohere.mdx
index c53e7e84b..af9098480 100644
--- a/docs/src/pages/docs/remote-models/cohere.mdx
+++ b/docs/src/pages/docs/remote-models/cohere.mdx
@@ -20,32 +20,26 @@ import { Settings, Plus } from 'lucide-react'
 
 # Cohere
 
-Jan supports [Cohere](https://cohere.com/) API integration, allowing you to use Cohere's models (Command, Command-R and more) through Jan's interface.
+Jan supports [Cohere](https://cohere.com/) API integration, allowing you to use Cohere's
+models (Command, Command-R and more) through Jan's interface.
 
 ## Integrate Cohere API with Jan
 
 <Steps>
-### Step 1: Get Your API Key 
-1. Visit [Cohere Dashboard](https://dashboard.cohere.com/api-keys) and sign in 
-2. Create & copy a new API key or copy your existing one
+
+### Step 1: Get Your API Key
+1. Visit [Cohere Dashboard](https://dashboard.cohere.com/api-keys) and sign in
+2. Create a new API key and/or copy your existing one
 
 <Callout type='info'>
-Ensure your API key has sufficient credits
+Ensure your API key has sufficient credits.
 </Callout>
 
 ### Step 2: Configure Jan
-There are two ways to add your Cohere API keys in Jan:
 
-**Through Threads:**
-1. In Threads, click **Model** tab in the **right sidebar** or **model selector** in input field
-2. Once the selector is poped up, choose the **Cloud** tab
-3. Click **Add** (<Plus width={16} height={16} style={{display:"inline"}}/>) icon next to **Cohere**
-4. Once you are directed to Cohere settings, insert your **API Key**  
-
-**Through Settings:**
 1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>)
-2. Under **Remote Engines**, select **Cohere**
-3. Insert your **API Key**  
+2. Under **Model Providers**, select **Cohere**
+3. Insert your **API Key**
 
 <br/>
 ![Cohere](../_assets/cohere.png)
@@ -53,16 +47,17 @@ There are two ways to add your Cohere API keys in Jan:
 
 ### Step 3: Start Using Cohere's Models
 
-1. In any existing **Threads** or create a new one
-2. Select a Cohere model from **model selector**
+1. Jump into any existing **Chat** or create a new one
+2. Select a Cohere model from **model selector** options
 3. Start chatting
 </Steps>
 
 ## Available Cohere Models
 
-Jan automatically includes Cohere's available models. In case you want to use a specific Cohere model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/models/manage-models#add-models-1):
-  - See list of available models in [Cohere Documentation](https://docs.cohere.com/v2/docs/models).
-  - The `id` property must match the model name in the list. For example, `command-nightly` or `command-light`.
+Jan automatically includes Cohere's available models. In case you want to use a specific
+Cohere model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/manage-models):
+- See list of available models in [Cohere Documentation](https://docs.cohere.com/v2/docs/models).
+- The `id` property must match the model name in the list. For example, `command-nightly` or `command-light`.
 
 ## Troubleshooting
 
@@ -83,4 +78,4 @@ Common issues and solutions:
 - Check if you're using the correct model ID
 - Verify your Cohere account has the necessary permissions
 
-Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the [Cohere documentation](https://docs.cohere.com).
\ No newline at end of file
+Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the [Cohere documentation](https://docs.cohere.com).
diff --git a/docs/src/pages/docs/remote-models/deepseek.mdx b/docs/src/pages/docs/remote-models/deepseek.mdx
deleted file mode 100644
index 67b596846..000000000
--- a/docs/src/pages/docs/remote-models/deepseek.mdx
+++ /dev/null
@@ -1,86 +0,0 @@
----
-title: DeepSeek
-description: Learn how to integrate DeepSeek with Jan for enhanced functionality.
-keywords:
-  [
-    Anthropic API,
-    Jan,
-    Jan AI,
-    ChatGPT alternative,
-    conversational AI,
-    large language model,
-    integration,
-    Anthropic integration,
-    API integration
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Settings, Plus } from 'lucide-react'
-
-# DeepSeek
-
-Jan supports [DeepSeek](https://www.deepseek.com/) API integration, allowing you to use DeepSeek models through Jan's interface.
-
-## Integrate DeepSeek API with Jan
-
-<Steps>
-### Step 1: Get Your API Key 
-1. Visit [DeepSeek Console](https://platform.deepseek.com/) and sign in 
-2. Create & copy a new API key or copy your existing one
-
-<Callout type='info'>
-Ensure your API key has sufficient credits
-</Callout>
-
-### Step 2: Configure Jan
-There are two ways to add your DeepSeek API keys in Jan:
-
-**Through Threads:**
-1. In Threads, click **Model** tab in the **right sidebar** or **model selector** in input field
-2. Once the selector is poped up, choose the **Cloud** tab
-3. Click **Add** (<Plus width={16} height={16} style={{display:"inline"}}/>) icon next to **DeepSeek**
-4. Once you are directed to DeepSeek settings, insert your **API Key**  
-
-**Through Settings:**
-1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>)
-2. Under **Remote Engines**, select **DeepSeek**
-3. Insert your **API Key**  
-
-<br/>
-![DeepSeek](../_assets/deepseek.png)
-<br/>
-
-### Step 3: Start Using DeepSeek's Models
-
-1. In any existing **Threads** or create a new one
-2. Select an DeepSeek model from **model selector**
-3. Start chatting
-</Steps>
-
-## Available DeepSeek Models
-
-Jan automatically includes DeepSeek's available models. In case you want to use a specific DeepSeek model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/models/manage-models#add-models-1):
-  - See list of available models in [DeepSeek Models](https://api-docs.deepseek.com/quick_start/pricing).
-  - The `id` property must match the model name in the list. For example, `deepseek-chat` or `deepseek-reasoner`.
-
-## Troubleshooting
-
-Common issues and solutions:
-
-**1. API Key Issues**
-- Verify your API key is correct and not expired
-- Check if you have billing set up on your DeepSeek account
-- Ensure you have access to the model you're trying to use
-
-**2. Connection Problems**
-- Check your internet connection
-- Verify [DeepSeek's system status](https://status.deepseek.com/)
-- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
-
-**3. Model Unavailable**
-- Confirm your API key has access to the model
-- Check if you're using the correct model ID
-- Verify your DeepSeek account has the necessary permissions
-
-Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH).
\ No newline at end of file
diff --git a/docs/src/pages/docs/remote-models/google.mdx b/docs/src/pages/docs/remote-models/google.mdx
index e9dd6e716..d29f1290b 100644
--- a/docs/src/pages/docs/remote-models/google.mdx
+++ b/docs/src/pages/docs/remote-models/google.mdx
@@ -25,8 +25,8 @@ Jan supports [Google](https://ai.google/get-started/our-models/) API integration
 ## Integrate Google API with Jan
 
 <Steps>
-### Step 1: Get Your API Key 
-1. Visit [Google AI Studio](https://aistudio.google.com/app/apikey) and sign in 
+### Step 1: Get Your API Key
+1. Visit [Google AI Studio](https://aistudio.google.com/app/apikey) and sign in
 2. Create & copy a new API key or copy your existing one
 
 <Callout type='info'>
@@ -34,18 +34,10 @@ Ensure your API key has sufficient credits
 </Callout>
 
 ### Step 2: Configure Jan
-There are two ways to add your Google API keys in Jan:
 
-**Through Threads:**
-1. In Threads, click **Model** tab in the **right sidebar** or **model selector** in input field
-2. Once the selector is poped up, choose the **Cloud** tab
-3. Click **Add** (<Plus width={16} height={16} style={{display:"inline"}}/>) icon next to **Google**
-4. Once you are directed to Google settings, insert your **API Key**  
-
-**Through Settings:**
-1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>)
-2. Under **Remote Engines**, select **Google**
-3. Insert your **API Key**  
+1. Navigate to the **Settings** page (<Settings width={16} height={16} style={{display:"inline"}}/>)
+2. Under **Model Providers**, select **Gemini**
+3. Insert your **API Key**
 
 <br/>
 ![Google](../_assets/google.png)
@@ -53,16 +45,17 @@ There are two ways to add your Google API keys in Jan:
 
 ### Step 3: Start Using Google's Models
 
-1. In any existing **Threads** or create a new one
-2. Select an Google model from **model selector**
+1. Got to any existing **Chat** or create a new one
+2. Select an Gemini model from **model selector**
 3. Start chatting
 </Steps>
 
 ## Available Google Models
 
-Jan automatically includes Google's available models like Gemini series. In case you want to use a specific Google model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/models/manage-models#add-models-1):
-  - See list of available models in [Google Models](https://ai.google.dev/gemini-api/docs/models/gemini).
-  - The `id` property must match the model name in the list. For example, `gemini-1.5-pro` or `gemini-2.0-flash-lite-preview`.
+Jan automatically includes Google's available models like Gemini series. In case you want to use a specific
+Gemini model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/manage-models#add-models-1):
+- See list of available models in [Google Models](https://ai.google.dev/gemini-api/docs/models/gemini).
+- The `id` property must match the model name in the list. For example, `gemini-1.5-pro` or `gemini-2.0-flash-lite-preview`.
 
 ## Troubleshooting
 
@@ -83,4 +76,4 @@ Common issues and solutions:
 - Check if you're using the correct model ID
 - Verify your Google account has the necessary permissions
 
-Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH).
\ No newline at end of file
+Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH).
diff --git a/docs/src/pages/docs/remote-models/groq.mdx b/docs/src/pages/docs/remote-models/groq.mdx
index 327e4a6b7..7db6a97b2 100644
--- a/docs/src/pages/docs/remote-models/groq.mdx
+++ b/docs/src/pages/docs/remote-models/groq.mdx
@@ -34,17 +34,9 @@ Ensure your API key has sufficient credits
 </Callout>
 
 ### Step 2: Configure Jan
-There are two ways to add your Groq API keys in Jan:
 
-**Through Threads:**
-1. In Threads, click **Model** tab in the **right sidebar** or **model selector** in input field
-2. Once the selector is poped up, choose the **Cloud** tab
-3. Click **Add** (<Plus width={16} height={16} style={{display:"inline"}}/>) icon next to **Groq**
-4. Once you are directed to Groq settings, insert your **API Key**
-
-**Through Settings:**
-1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>)
-2. Under **Remote Engines**, select **Groq**
+1. Navigate to the **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>)
+2. Under **Model Providers**, select **Groq**
 3. Insert your **API Key**
 
 <br/>
@@ -54,16 +46,17 @@ There are two ways to add your Groq API keys in Jan:
 
 ### Step 3: Start Using Groq's Models
 
-1. In any existing **Threads** or create a new one
+1. Jump into any existing **Chat** or create a new one
 2. Select a Groq model from **model selector**
 3. Start chatting
 </Steps>
 
 ## Available Models Through Groq
 
-Jan automatically includes Groq's available models. In case you want to use a specific Groq model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/models/manage-models#add-models-1):
-  - See list of available models in [Groq Documentation](https://console.groq.com/docs/models).
-  - The `id` property must match the model name in the list. For example, if you want to use Llama3.3 70B, you must set the `id` property to `llama-3.3-70b-versatile`.
+Jan automatically includes Groq's available models. In case you want to use a specific Groq model that
+you cannot find in **Jan**, follow the instructions in the [Add Cloud Models](/docs/manage-models#add-models-1):
+- See list of available models in [Groq Documentation](https://console.groq.com/docs/models).
+- The `id` property must match the model name in the list. For example, if you want to use Llama3.3 70B, you must set the `id` property to `llama-3.3-70b-versatile`.
 
 ## Troubleshooting
 
@@ -84,4 +77,4 @@ Common issues and solutions:
 - Check if you're using the correct model ID
 - Verify your Groq account has the necessary permissions
 
-Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the [Groq documentation](https://console.groq.com/docs).
\ No newline at end of file
+Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the [Groq documentation](https://console.groq.com/docs).
diff --git a/docs/src/pages/docs/remote-models/martian.mdx b/docs/src/pages/docs/remote-models/martian.mdx
deleted file mode 100644
index 9b494419c..000000000
--- a/docs/src/pages/docs/remote-models/martian.mdx
+++ /dev/null
@@ -1,84 +0,0 @@
----
-title: Martian
-description: Learn how to integrate Martian with Jan for enhanced functionality.
-keywords:
-  [
-    Martian API,
-    Jan,
-    Jan AI,
-    ChatGPT alternative,
-    conversational AI,
-    large language model,
-    integration,
-    Martian integration,
-    API integration
-  ]
----
-import { Callout, Steps } from 'nextra/components'
-import { Settings, Plus } from 'lucide-react'
-
-# Martian
-
-Jan supports [Martian](https://withmartian.com/) API integration, allowing you to use Martian's models through Jan's interface.
-
-## Integrate Martian with Jan
-
-<Steps>
-### Step 1: Get Your API Key
-1. Visit [Martian API Keys](https://www.withmartian.com/dashboard/undefined/api-keys) and sign in
-2. Create & copy a new API key or copy your existing one
-
-<Callout type='info'>
-Ensure your API key has sufficient credits
-</Callout>
-
-### Step 2: Configure Jan
-There are two ways to add your Martian key in Jan:
-
-**Through Threads:**
-1. In Threads, click **Model** tab in the **right sidebar** or **model selector** in input field
-2. Once the selector is poped up, choose the **Cloud** tab
-3. Click **Add** (<Plus width={16} height={16} style={{display:"inline"}}/>) icon next to **Martian**
-4. Once you are directed to Martian settings, insert your **API Key**
-
-**Through Settings:**
-1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>)
-2. Under **Remote Engines**, select **Martian**
-3. Insert your **API Key**
-
-<br/>
-![Martian](../_assets/martian.png)
-<br/>
-
-### Step 3: Start Using Martian Models
-
-1. In any existing **Threads** or create a new one
-2. Select a Martian model from **model selector**
-3. Start chatting
-</Steps>
-
-## Available Models
-
-Jan includes the Martian Model Router which automatically selects the best model for your use case. You can start using it right away after configuring your API key. See list of available models in [Martian Documentation](https://docs.withmartian.com/martian-model-router/getting-started/supported-models-gateway).
-
-
-## Troubleshooting
-
-Common issues and solutions:
-
-**1. API Key Issues**
-- Verify your API key is correct and not expired
-- Check if you have billing set up on your Martian account
-- Ensure you have access to the model you're trying to use
-
-**2. Connection Problems**
-- Check your internet connection
-- Verify Martian's system status
-- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
-
-**3. Model Unavailable**
-- Confirm your API key has access to the model
-- Check if you're using the correct model ID
-- Verify your Martian account has the necessary permissions
-
-Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the [Martian documentation](https://docs.withmartian.com/martian-model-router).
\ No newline at end of file
diff --git a/docs/src/pages/docs/remote-models/mistralai.mdx b/docs/src/pages/docs/remote-models/mistralai.mdx
index 68efe5073..ea403a701 100644
--- a/docs/src/pages/docs/remote-models/mistralai.mdx
+++ b/docs/src/pages/docs/remote-models/mistralai.mdx
@@ -21,13 +21,15 @@ import { Settings, Plus } from 'lucide-react'
 
 # Mistral AI
 
-Jan supports [Mistral AI](https://mistral.ai/) API integration, allowing you to use Mistral's powerful models (Mistral Large, Mistral Medium, Mistral Small and more) through Jan's interface.
+Jan supports all models available via the [Mistral AI](https://mistral.ai/) API, allowing you to use Mistral's
+powerful models (Mistral Large, Mistral Medium, Mistral Small and more) through Jan's interface.
 
 ## Integrate Mistral AI with Jan
 
 <Steps>
+
 ### Step 1: Get Your API Key
-1. Visit [Mistral AI Platform](https://console.mistral.ai/api-keys/) and sign in
+1. Visit the [Mistral AI Platform](https://console.mistral.ai/api-keys/) and sign in
 2. Create & copy a new API key or copy your existing one
 
 <Callout type='info'>
@@ -35,17 +37,9 @@ Ensure your API key has sufficient credits
 </Callout>
 
 ### Step 2: Configure Jan
-There are two ways to add your Mistral AI keys in Jan:
 
-**Through Threads:**
-1. In Threads, click **Model** tab in the **right sidebar** or **model selector** in input field
-2. Once the selector is poped up, choose the **Cloud** tab
-3. Click **Add** (<Plus width={16} height={16} style={{display:"inline"}}/>) icon next to **Mistral AI**
-4. Once you are directed to Mistral AI settings, insert your **API Key**
-
-**Through Settings:**
-1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>)
-2. Under **Remote Engines**, select **Mistral AI**
+1. Navigate to the **Settings** page (<Settings width={16} height={16} style={{display:"inline"}}/>)
+2. Under **Model Providers**, select **Mistral AI**
 3. Insert your **API Key**
 
 <br/>
@@ -54,16 +48,18 @@ There are two ways to add your Mistral AI keys in Jan:
 
 ### Step 3: Start Using Mistral's Models
 
-1. In any existing **Threads** or create a new one
+1. Open any existing **Chat** or create a new one
 2. Select a Mistral model from **model selector**
 3. Start chatting
 </Steps>
 
 ## Available Mistral Models
 
-Jan automatically includes Mistral's available models. In case you want to use a specific Mistral model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/models/manage-models#add-models-1):
-  - See list of available models in [Mistral AI Documentation](https://docs.mistral.ai/platform/endpoints).
-  - The `id` property must match the model name in the list. For example, if you want to use Mistral Large, you must set the `id` property to `mistral-large-latest`
+Jan automatically includes Mistral's available models. In case you want to use a specific Mistral model
+that you cannot find in **Jan**, follow the instructions in [Add Cloud Models](/docs/manage-models#add-models-1):
+- See list of available models in [Mistral AI Documentation](https://docs.mistral.ai/platform/endpoints).
+- The `id` property must match the model name in the list. For example, if you want to use
+Mistral Large, you must set the `id` property to `mistral-large-latest`
 
 ## Troubleshooting
 
@@ -84,4 +80,4 @@ Common issues and solutions:
 - Check if you're using the correct model ID
 - Verify your Mistral AI account has the necessary permissions
 
-Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the [Mistral AI documentation](https://docs.mistral.ai/).
\ No newline at end of file
+Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the [Mistral AI documentation](https://docs.mistral.ai/).
diff --git a/docs/src/pages/docs/remote-models/nvidia-nim.mdx b/docs/src/pages/docs/remote-models/nvidia-nim.mdx
deleted file mode 100644
index fc4e26549..000000000
--- a/docs/src/pages/docs/remote-models/nvidia-nim.mdx
+++ /dev/null
@@ -1,91 +0,0 @@
----
-title: NVIDIA 
-description: Learn how to integrate NVIDIA NIM with Jan for enhanced functionality.
-keywords:
-  [
-    Nvidia API,
-    Jan,
-    Jan AI,
-    ChatGPT alternative,
-    conversational AI,
-    large language model,
-    integration,
-    Nvidia integration,
-    API integration
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Settings, Plus } from 'lucide-react'
-
-# NVIDIA NIM
-
-Jan supports [Nvidia](https://www.nvidia.com/en-us/ai/) API integration, allowing you to use Nvidia's Large Language Models through Jan's interface.
-
-<Callout type='info'>
-Nvidia engine is only supported on Jan version 0.5.1 or later.
-</Callout>
-
-## Integrate Nvidia NIM API with Jan
-
-<Steps>
-### Step 1: Get Your API Key 
-1. Visit [Nvidia](https://docs.nvidia.com/nim/nemo-retriever/text-reranking/latest/getting-started.html#generate-an-api-key) and generate an API key
-2. Copy your API key 
-
-<Callout type='info'>
-Ensure your API key has sufficient credits
-</Callout>
-
-### Step 2: Configure Jan
-There are two ways to add your Nvidia API keys in Jan:
-
-**Through Threads:**
-1. In Threads, click **Model** tab in the **right sidebar** or **model selector** in input field
-2. Once the selector is poped up, choose the **Cloud** tab
-3. Click **Add** (<Plus width={16} height={16} style={{display:"inline"}}/>) icon next to **Nvidia**
-4. Once you are directed to Nvidia settings, insert your **API Key**  
-
-**Through Settings:**
-1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>)
-2. Under **Remote Engines**, select **Nvidia**
-3. Insert your **API Key**  
-
-<br/>
-![NVIDIA NIM](../_assets/nvidia-nim.png)
-<br/>
-
-### Step 3: Start Using Nvidia NIM Models
-
-1. In any existing **Threads** or create a new one
-2. Select a Nvidia model from **model selector**
-3. Start chatting
-</Steps>
-
-## Available Nvidia NIM Models
-
-Jan automatically includes Nvidia's available models. In case you want to use a specific model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/models/manage-models#add-models-1):
-  - See list of available models in [Nvidia Documentation](https://build.nvidia.com/models).
-  - The `id` property must match the model name in the list.
-
-## Troubleshooting
-
-Common issues and solutions:
-
-**1. API Key Issues**
-- Verify your API key is correct and not expired
-- Check if you have billing set up on your Nvidia account
-- Ensure you have access to the model you're trying to use
-
-**2. Connection Problems**
-- Check your internet connection
-- Verify Nvidia's system status
-- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
-
-**3. Model Unavailable**
-- Confirm your API key has access to the model
-- Check if you're using the correct model ID
-- Verify your Nvidia account has the necessary permissions
-- Make sure you're using Jan version 0.5.1 or later
-
-Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the [Nvidia documentation](https://docs.nvidia.com/nim/large-language-models/latest/getting-started.html).
\ No newline at end of file
diff --git a/docs/src/pages/docs/remote-models/openai.mdx b/docs/src/pages/docs/remote-models/openai.mdx
index f3cb63ef6..a2e733939 100644
--- a/docs/src/pages/docs/remote-models/openai.mdx
+++ b/docs/src/pages/docs/remote-models/openai.mdx
@@ -19,15 +19,18 @@ keywords:
 import { Callout, Steps } from 'nextra/components'
 import { Settings, Plus } from 'lucide-react'
 
-# OpenAI 
+# OpenAI
 
-Jan supports [OpenAI](https://openai.com/) and OpenAI-compatible APIs, allowing you to use all models from OpenAI (GPT-4, GPT o1 and more) through Jan's interface.
+Jan supports most [OpenAI](https://openai.com/) as well as the plenty of OpenAI-compatible APIs out there,
+allowing you to use all models from OpenAI (GPT-4o, o3 and even those from Together AI, DeepSeek, Fireworks
+and more) through Jan's interface.
 
 ## Integrate OpenAI API with Jan
 
 <Steps>
-### Step 1: Get Your API Key 
-1. Visit [OpenAI Platform](https://platform.openai.com/api-keys) and sign in 
+
+### Step 1: Get Your API Key
+1. Visit the [OpenAI Platform](https://platform.openai.com/api-keys) and sign in
 2. Create & copy a new API key or copy your existing one
 
 <Callout type='info'>
@@ -35,18 +38,10 @@ Ensure your API key has sufficient credits
 </Callout>
 
 ### Step 2: Configure Jan
-There are two ways to add your OpenAI API keys in Jan:
 
-Through Threads:
-1. In Threads, click Model tab in the right sidebar or model selector in input field
-2. Once the selector is poped up, choose the Cloud tab
-3. Click Add (<Plus width={16} height={16} style={{display:"inline"}}/>) icon next to OpenAI
-4. Once you are directed to OpenAI settings, insert your API Key  
-
-Through Settings:
-1. Navigate to Settings (<Settings width={16} height={16} style={{display:"inline"}}/>)
+1. Navigate to the Settings page (<Settings width={16} height={16} style={{display:"inline"}}/>)
 2. Under Remote Engines, select OpenAI
-3. Insert your API Key  
+3. Insert your API Key
 
 <br/>
 ![OpenAI](../_assets/openai.png)
@@ -56,15 +51,18 @@ Through Settings:
 
 In any existing Threads or create a new one
 Select an OpenAI model from model selector
-Start chatting 
+Start chatting
 
 </Steps>
 
 ## Available OpenAI Models
 
-Jan automatically includes popular OpenAI models. In case you want to use a specific OpenAI model that you cannot find in Jan, follow instructions in [Add Cloud Models](/docs/models/manage-models#add-models-1):
-  - See list of available models in [OpenAI Platform](https://platform.openai.com/docs/models/overview).
-  - The id property must match the model name in the list. For example, if you want to use the [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo), you must set the id property to gpt-4-1106-preview.
+Jan automatically includes popular OpenAI models. In case you want to use a specific model that you
+cannot find in Jan, follow instructions in [Add Cloud Models](/docs/manage-models#add-models-1):
+- See list of available models in [OpenAI Platform](https://platform.openai.com/docs/models/overview).
+- The id property must match the model name in the list. For example, if you want to use the
+[GPT-4.5](https://platform.openai.com/docs/models/), you must set the id property
+to respective one.
 
 ## Troubleshooting
 
@@ -85,4 +83,5 @@ Common issues and solutions:
 - Check if you're using the correct model ID
 - Verify your OpenAI account has the necessary permissions
 
-Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the [OpenAI documentation](https://platform.openai.com/docs).
\ No newline at end of file
+Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the
+[OpenAI documentation](https://platform.openai.com/docs).
diff --git a/docs/src/pages/docs/remote-models/openrouter.mdx b/docs/src/pages/docs/remote-models/openrouter.mdx
index a32de2423..186a504b9 100644
--- a/docs/src/pages/docs/remote-models/openrouter.mdx
+++ b/docs/src/pages/docs/remote-models/openrouter.mdx
@@ -24,13 +24,19 @@ import { Settings, Plus } from 'lucide-react'
 
 ## Integrate OpenRouter with Jan
 
-[OpenRouter](https://openrouter.ai/) is a tool that gathers AI models. Developers can utilize its API to engage with diverse large language models, generative image models, and generative 3D object models with a competitive pricing.
+[OpenRouter](https://openrouter.ai/) is a tool that gathers AI model APIs and provides access to all
+via a unified API. Developers can use the API to interact with LLMs, generative image models, and
+even models that generate 3D objects, all with a competitive pricing.
 
-Jan supports OpenRouter API integration, allowing you to use models from various providers (Anthropic, Google, Meta and more) through a single API.
+Jan supports the OpenRouter API, allowing you to use models from various providers (Anthropic, Google,
+Meta and more) and helping you avoid having to get an API from all of your favorite ones.
+
+OpenRouter even offers a few free models! 🙌
 
 ## Integrate OpenRouter with Jan
 
 <Steps>
+
 ### Step 1: Get Your API Key
 1. Visit [OpenRouter](https://openrouter.ai/keys) and sign in
 2. Create & copy a new API key or copy your existing one
@@ -40,17 +46,9 @@ Ensure your API key has sufficient credits. OpenRouter credits work across all a
 </Callout>
 
 ### Step 2: Configure Jan
-There are two ways to add your OpenRouter key in Jan:
 
-**Through Threads:**
-1. In Threads, click **Model** tab in the **right sidebar** or **model selector** in input field
-2. Once the selector is poped up, choose the **Cloud** tab
-3. Click **Add** (<Plus width={16} height={16} style={{display:"inline"}}/>) icon next to **OpenRouter**
-4. Once you are directed to OpenRouter settings, insert your **API Key**
-
-**Through Settings:**
-1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>)
-2. Under **Remote Engines**, select **OpenRouter**
+1. Navigate to the **Settings** page (<Settings width={16} height={16} style={{display:"inline"}}/>)
+2. Under **Model Providers**, select **OpenRouter**
 3. Insert your **API Key**
 
 <br/>
@@ -59,7 +57,7 @@ There are two ways to add your OpenRouter key in Jan:
 
 ### Step 3: Start Using OpenRouter Models
 
-1. In any existing **Threads** or create a new one
+1. Pick any existing **Chat** or create a new one
 2. Select any model from **model selector** under OpenRouter
 3. Start chatting
 </Steps>
@@ -74,9 +72,9 @@ Jan automatically use your default OpenRouter's available models. For custom con
 - Available options can be found in [OpenRouter's Model Reference](https://openrouter.ai/models)
 
 **Examples of Model IDs:**
-- Claude 3 Opus: `anthropic/claude-3-opus-20240229`
-- Google Gemini Pro: `google/gemini-pro`
-- Mistral Large: `mistralai/mistral-large`
+- Claude 4 Opus: `anthropic/claude-opus-4`
+- Google Gemini 2.5 Pro: `google/gemini-2.5-pro-preview`
+- DeepSeek R1 Latest: `deepseek/deepseek-r1-0528`
 
 ## Troubleshooting
 
@@ -97,4 +95,4 @@ Common issues and solutions:
 - Check if you're using the correct model ID format
 - Verify the model provider is currently operational
 
-Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the [OpenRouter documentation](https://openrouter.ai/docs).
\ No newline at end of file
+Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the [OpenRouter documentation](https://openrouter.ai/docs).
diff --git a/docs/src/pages/docs/remote-models/triton.mdx b/docs/src/pages/docs/remote-models/triton.mdx
deleted file mode 100644
index 494882c42..000000000
--- a/docs/src/pages/docs/remote-models/triton.mdx
+++ /dev/null
@@ -1,57 +0,0 @@
----
-title: Triton-TRT-LLM
-description: Learn how to integrate Triton-TRT-LLM with Jan for enhanced functionality.
-keywords:
-  [
-    Triton-TRT-LLM API,
-    Jan,
-    Jan AI,
-    ChatGPT alternative,
-    conversational AI,
-    large language model,
-    integration,
-    Triton-TRT-LLM integration,
-    API integration
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-
-# Triton-TRT-LLM
-
-## How to Integrate Triton-TRT-LLM with Jan
-
-This guide provides step-by-step instructions on integrating Triton-TRT-LLM with Jan, enabling users to leverage Triton-TRT-LLM's capabilities within Jan's conversational interface.
-
-<Callout type='info'>
-  Triton-TRT-LLM extension is only supported on Jan version 0.5.1 or later.
-</Callout>
-
-Before proceeding, ensure you have the following:
-- Access to the Jan application
-- Triton-TRT-LLM credentials
-
-## Integration Steps
-<Steps>
-### Step 1: Configure Triton-TRT-LLM API Key
-1. Run [Triton-TRT-LLM](https://github.com/npuichigo/openai_trtllm) to obtain the API Keys.
-2. Copy your **Triton-TRT-LLM Key**.
-3. Navigate to the **Jan app** > **Gear Icon (⚙️)** > **Triton-TRT-LLM** section under Model Providers.
-4. Insert your **Triton-TRT-LLM API Key**.
-
-
-### Step 2: Start Chatting with the Model
-
-1. Select the model you want to use.
-2. Specify the model's parameters.
-3. Start the conversation with the model.
-
-</Steps>
-
-## Troubleshooting
-
-If you encounter any issues during the integration process or while using Triton-TRT-LLM with Jan, consider the following troubleshooting steps:
-
-- Double-check your API credentials to ensure they are correct.
-- Check for error messages or logs that may provide insight into the issue.
-- Reach out to Triton-TRT-LLM support for assistance if needed.
\ No newline at end of file
diff --git a/docs/src/pages/integrations/coding/_assets/tabby-answer-engine.png b/docs/src/pages/docs/server-examples/_assets/tabby-answer-engine.png
similarity index 100%
rename from docs/src/pages/integrations/coding/_assets/tabby-answer-engine.png
rename to docs/src/pages/docs/server-examples/_assets/tabby-answer-engine.png
diff --git a/docs/src/pages/integrations/coding/_assets/tabby-chat-sidebar.png b/docs/src/pages/docs/server-examples/_assets/tabby-chat-sidebar.png
similarity index 100%
rename from docs/src/pages/integrations/coding/_assets/tabby-chat-sidebar.png
rename to docs/src/pages/docs/server-examples/_assets/tabby-chat-sidebar.png
diff --git a/docs/src/pages/integrations/coding/continue-dev.mdx b/docs/src/pages/docs/server-examples/continue-dev.mdx
similarity index 100%
rename from docs/src/pages/integrations/coding/continue-dev.mdx
rename to docs/src/pages/docs/server-examples/continue-dev.mdx
diff --git a/docs/src/pages/integrations/function-calling/interpreter.mdx b/docs/src/pages/docs/server-examples/interpreter.mdx
similarity index 100%
rename from docs/src/pages/integrations/function-calling/interpreter.mdx
rename to docs/src/pages/docs/server-examples/interpreter.mdx
diff --git a/docs/src/pages/integrations/messaging/llmcord.mdx b/docs/src/pages/docs/server-examples/llmcord.mdx
similarity index 100%
rename from docs/src/pages/integrations/messaging/llmcord.mdx
rename to docs/src/pages/docs/server-examples/llmcord.mdx
diff --git a/docs/src/pages/integrations/workflow-automation/n8n.mdx b/docs/src/pages/docs/server-examples/n8n.mdx
similarity index 100%
rename from docs/src/pages/integrations/workflow-automation/n8n.mdx
rename to docs/src/pages/docs/server-examples/n8n.mdx
diff --git a/docs/src/pages/integrations/coding/tabby.mdx b/docs/src/pages/docs/server-examples/tabby.mdx
similarity index 100%
rename from docs/src/pages/integrations/coding/tabby.mdx
rename to docs/src/pages/docs/server-examples/tabby.mdx
diff --git a/docs/src/pages/docs/threads.mdx b/docs/src/pages/docs/threads.mdx
index 1c6fbd141..d92151f76 100644
--- a/docs/src/pages/docs/threads.mdx
+++ b/docs/src/pages/docs/threads.mdx
@@ -1,6 +1,6 @@
 ---
-title: Using Threads
-description: Manage your interaction with AI locally.
+title: Start Chatting
+description: Manage your interaction with AI models locally.
 keywords:
   [
     Jan,
@@ -22,56 +22,58 @@ import { Callout } from 'nextra/components'
 import { SquarePen, Pencil, Ellipsis, Paintbrush, Trash2 } from 'lucide-react'
 
 
-# Using Threads
+# Chat with a Model
 
-Jan organizes your AI conversations into threads, making it easy to track and revisit your interactions. This guide will help you effectively manage your chat history.
+Jan organizes your conversations with a model into chats or threads, making it easy to track and revisit
+your interactions. This guide will help you effectively manage your chat history.
 
-## Creating New Thread
-1. Click **New Chat** (<SquarePen width={16} height={16} style={{display:"inline"}}/>) icon on the bottom left of Jan
-2. Select your preferred model in **Model Selector** in input field & start chatting
+## Creating New Conversation/Thread
+1. Click **New Chat** (<SquarePen width={16} height={16} style={{display:"inline"}}/>) icon on the
+bottom left of Jan.
+2. Select your preferred model in **Model Selector** in input field & start chatting.
 
 <br/>
 ![Create New Thread](./_assets/threads-new-chat.png)
 
-## View Threads History
+## View Your Chat History
 
-1. Once you open Jan, the default screen is **Threads**
+1. Once you open Jan, the default screen is **Chat**
 2. On the **left sidebar**, you can:
-- View **Thread List**, scroll through your threads history
-- Click any thread to open the full conversation
+  - View your **Conversations** and scroll through your history
+  - Click any chat to open the full conversation
 
 ## Favorites and Recents
 
-Jan helps you quickly access important and recent conversations with **Favorites** and **Recents** in the left sidebar:
-
-- **Favorites**: Pin threads you use often for instant access. Click the star icon in the context menu next to any thread to add or remove it from Favorites.
+Jan helps you quickly access important and recent conversations with **Favorites** and **Recents**
+in the left sidebar:
+- **Favorites**: Pin threads you use often for instant access. Click the three dots icon on the right of the
+thread and a context menu will pop up with the favorite option for you to click on.
 - **Recents**: See your most recently accessed threads for quick navigation.
 
 <br/>
 ![Favorites and Recents](./_assets/threads-favorites-and-recents.png)
 
-*Screenshot: The left sidebar showing Favorites and Recents sections for easy thread management.*
 
-## Edit Thread Title
-1. Navigate to the **Thread** that you want to edit title in left sidebar
-2. Hover on the thread and click on **three dots** (<Ellipsis width={16} height={16} style={{display:"inline"}}/>) icon
-3. Select <Pencil width={16} height={16} style={{display:"inline"}}/> **Rename** 
+## Edit a Chat Title
+1. Navigate to the **Conversation** that you want to edit title on the sidebar to your left
+2. Hover on the conversation and click on **three dots** (<Ellipsis width={16} height={16} style={{display:"inline"}}/>) icon
+3. Click <Pencil width={16} height={16} style={{display:"inline"}}/> **Rename**
 4. Add new title & save
 
 <br/>
 ![Context Menu](./_assets/threads-context-menu.png)
 
-
 ## Delete Thread
+
 <Callout type="warning">
-There's no undo for thread deletion, so make sure you want to remove the thread permanently.
+There's no undo for chat deletion, so make sure you REALLY want to remove it permanently.
 </Callout>
-### Delete a specific thread
+
 When you want to completely remove a thread:
 
 1. Navigate to the **Thread** that you want to delete in left sidebar
 2. Hover on the thread and click on **three dots** (<Ellipsis width={16} height={16} style={{display:"inline"}}/>) icon
-3. Select <Trash2 width={16} height={16} style={{display:"inline"}}/> **Delete** 
+3. Click <Trash2 width={16} height={16} style={{display:"inline"}}/> **Delete**
 
 
 <br/>
@@ -79,6 +81,6 @@ When you want to completely remove a thread:
 
 ### Delete all threads at once
 
-In case you need to remove all threads at once:
+In case you need to remove all conversations at once:
 1. Hover on the `Recents` category and click on **three dots** (<Ellipsis width={16} height={16} style={{display:"inline"}}/>) icon
-2. Select <Trash2 width={16} height={16} style={{display:"inline"}}/> **Delete All** 
\ No newline at end of file
+2. Select <Trash2 width={16} height={16} style={{display:"inline"}}/> **Delete All**
diff --git a/docs/src/pages/docs/tools/retrieval.mdx b/docs/src/pages/docs/tools/retrieval.mdx
deleted file mode 100644
index 50e589386..000000000
--- a/docs/src/pages/docs/tools/retrieval.mdx
+++ /dev/null
@@ -1,64 +0,0 @@
----
-title: Knowledge Retrieval
-description: A step-by-step guide to chat with PDF documents.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    use tools,
-    rag,
-    retrieval,
-    chat with pdf,
-  ]
----
-
-import { Callout, Steps } from 'nextra/components' 
-
-# Knowledge Retrieval
-Chat with your documents and images using Jan's RAG (Retrieval-Augmented Generation) capability.
-
-<Callout type="warning">
- This feature is currently experimental and must be enabled through [Experimental Mode](/docs/settings#experimental-mode) in **Advanced Settings**.
-</Callout>
-
-## Enable File Search & Vision
-
-To chat with PDFs & images in Jan, follow these steps:
-
-1. In any **Thread**, click the **Tools** tab in right sidebar
-2. Enable **Retrieval**
-
-<br/>
-![Retrieval](../_assets/retrieval-01.png)
-<br/>
-
-3. Once enabled, you should be able to **upload file(s) & image(s)** from threads input field
-<Callout type="info">
-Ensure that you are using a multimodal model.
-- File Search: Jan currently supports PDF format 
-- Vision: only works with local models or [OpenAI](/docs/remote-models/openai) models for now
-</Callout>
-
-<br/>
-![Retrieval](../_assets/retrieval-02.png)
-<br/>
-
-## Knowledge Retrieval Parameters
-
-| Feature               | Description                                                                                                                                                   |
-|-----------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| **Retrieval**         | - Utilizes information from uploaded files, automatically retrieving content relevant to your queries for enhanced interaction.<br></br>- Use this for complex inquiries where context from uploaded documents significantly enhances response quality.                                  |
-| **Embedding Model**   | - Converts text into numerical representations for machine understanding.<br></br>- Choose a model based on your needs and available resources, balancing accuracy and computational efficiency.              |
-| **Vector Database**   | - Facilitates quick searches through stored numerical text representations to find relevant information efficiently.<br></br>- Optimize your vector database settings to ensure quick retrieval without sacrificing accuracy, particularly in applications with large data sets.                                           |
-| **Top K**             | - Determines the number of top-ranked documents to retrieve, allowing control over search result relevance.<br></br>- Adjust this setting based on the precision needed. A lower value for more precise, focused searches and a higher value for broader, more comprehensive searches.                                                     |
-| **Chunk Size**        | - Sets the maximum number of tokens per data chunk, which is crucial for managing processing load and maintaining performance.<br></br>- Increase the chunk size for processing large blocks of text efficiently, or decrease it when dealing with smaller, more manageable texts to optimize memory usage.                                           |
-| **Chunk Overlap**     | - Specifies the overlap in tokens between adjacent chunks to ensure continuous context in split text segments.<br></br>- Adjust the overlap to ensure smooth transitions in text analysis, with higher overlap for complex texts where context is critical.                                                  |
-| **Retrieval Template**| - Defines the query structure using variables like `{CONTEXT}` and `{QUESTION}` to tailor searches to specific needs.<br></br>- Customize templates to closely align with your data's structure and the queries' nature, ensuring that retrievals are as relevant as possible.                                               |
-
diff --git a/docs/src/pages/docs/troubleshooting.mdx b/docs/src/pages/docs/troubleshooting.mdx
index ec3e31e50..8d793cf52 100644
--- a/docs/src/pages/docs/troubleshooting.mdx
+++ b/docs/src/pages/docs/troubleshooting.mdx
@@ -105,9 +105,9 @@ To resolve the issue where Jan is stuck in a broken build after installation:
 
   <Tabs.Tab>
       1. **Uninstall** Jan
-      
+
       Choose the appropriate method based on how you installed Jan:
-      
+
       **For Debian/Ubuntu:**
       ```
       sudo apt-get remove Jan
@@ -120,7 +120,7 @@ To resolve the issue where Jan is stuck in a broken build after installation:
       # Default dir
       ~/.config/Jan
       # Custom installation directory
-      $XDG_CONFIG_HOME = /home/username/custom_config/Jan 
+      $XDG_CONFIG_HOME = /home/username/custom_config/Jan
       ```
 
       3. If you are using a version before `0.4.2`, you need to run the following commands:
@@ -206,7 +206,6 @@ To verify GPU acceleration is turned on:
 
 <br/>
 ![Hardware](./_assets/trouble-shooting-01.png)
-![Hardware](./_assets/trouble-shooting-03.png)
 <br/>
 
 
@@ -347,7 +346,7 @@ If you check your [app logs](/docs/troubleshooting#how-to-get-error-logs) & see
 netstat -an | grep 39291
 
 # Windows
-netstat -ano | find "39291" 
+netstat -ano | find "39291"
 tasklist /fi "PID eq 39291"
 
 # Linux
@@ -383,7 +382,7 @@ This will delete all your Jan data.
 The "Unexpected token" error usually relates to OpenAI API authentication or regional restrictions.
 
 **Step 1: API Key Sepup**
-1. Get a valid API key from [OpenAI's developer platform](https://platform.openai.com/) 
+1. Get a valid API key from [OpenAI's developer platform](https://platform.openai.com/)
 2. Ensure the key has sufficient credits & appropriate permissions
 
 **Step 2: Regional Access**
@@ -399,4 +398,4 @@ If you can't find what you need in our troubleshooting guide, feel free reach ou
 
 <Callout type="info">
 Check the logs to ensure the information is what you intend to send. We retain your logs for only **24 hours**, so report any issues promptly.
-</Callout>
\ No newline at end of file
+</Callout>
diff --git a/docs/src/pages/integrations/_meta.json b/docs/src/pages/integrations/_meta.json
deleted file mode 100644
index b5cfe9e0b..000000000
--- a/docs/src/pages/integrations/_meta.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-  "-- Switcher": {
-    "type": "separator",
-    "title": "Switcher"
-  },
-  "index": { "title": "Overview", "href": "/integrations", "display": "hidden" },
-  "coding": "Coding",
-  "function-calling": "Function Calling",
-  "messaging": "Messaging"
-}
diff --git a/docs/src/pages/integrations/index.mdx b/docs/src/pages/integrations/index.mdx
deleted file mode 100644
index afd7ebbf0..000000000
--- a/docs/src/pages/integrations/index.mdx
+++ /dev/null
@@ -1,28 +0,0 @@
----
-title: Discord
-description: A step-by-step guide on integrating Jan with a Discord bot.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    Discord integration,
-    Discord,
-    bot,
-  ]
----
-
-import { Callout, Steps } from 'nextra/components'
-import { Cards, Card } from 'nextra/components'
-
-<Cards>
-  <Card title="Coding" href="/integrations/coding/continue-dev" />
-  <Card title="Function Calling" href="/integrations/function-calling/interpreter" />
-  <Card title="Messaging" href="/integrations/messaging/llmcord" />
-</Cards>
\ No newline at end of file
diff --git a/docs/src/pages/privacy.mdx b/docs/src/pages/privacy.mdx
index 2700953e0..8f18973a9 100644
--- a/docs/src/pages/privacy.mdx
+++ b/docs/src/pages/privacy.mdx
@@ -6,22 +6,27 @@ import { Callout } from 'nextra/components'
 
 # Privacy
 
-Menlo Research is committed to protecting your privacy and ensuring that your personal information is handled safely and responsibly. This policy outlines how we collect, store, and use your personal information when you use any of our products.
+Menlo Research is committed to protecting your privacy and ensuring that your personal information is handled safely and
+responsibly. This policy outlines how we collect, store, and use your personal information when you use any of our products.
 
 ## Data Collection
 
-Jan, Cortex, and all Menlo Research products do not collect personally identifying information. You can read about [our philosophy](/about#philosophy) here and audit our open-source codebases.
+Jan, Cortex, and all Menlo Research products do not collect personally identifying information. You can read about
+[our philosophy](/about#philosophy) here and audit our open-source codebases.
 
 ### When you voluntarily provide data
 
-We -do- collect personal information you voluntarily provide us, e.g., when you sign up for our newsletter, join our Discord, or contact us via email.
+We -do- collect personal information you voluntarily provide us, e.g., when you sign up for our newsletter, join our
+Discord, or contact us via email.
 
 ### Jan
 
-Jan runs with privacy by default and is used 100% offline on your own computer. Your data (e.g., conversation history, usage logs) are stored locally and never leave your computer.
+Jan runs with privacy by default and is used 100% offline on your own computer. Your data (e.g., conversation history,
+usage logs) are stored locally and never leave your computer.
 
 <Callout type="info">
-If you use a Remote AI API (e.g., OpenAI API, Groq API), your data will naturally travel to their servers. They will be subject to the privacy policy of the respective API provider.
+If you use a Remote AI API (e.g., OpenAI API, Groq API), your data will naturally travel to their servers. They will be
+subject to the privacy policy of the respective API provider.
 </Callout>
 
 ### Cortex
@@ -30,16 +35,19 @@ Cortex is a library that runs large language models (LLMs) locally on your compu
 
 ## Data Sharing
 
-We do not share your personal information with third parties except as required by law or as necessary to provide you with the services you have requested.
+We do not share your personal information with third parties except as required by law or as necessary to provide you with
+the services you have requested.
 
 ## Data Security
 
-We take the security of your personal information seriously and have implemented appropriate technical and organizational measures to protect your personal information from unauthorized access, disclosure, or misuse.
+We take the security of your personal information seriously and have implemented appropriate technical and organizational
+measures to protect your personal information from unauthorized access, disclosure, or misuse.
 
 ## Your Choices
 
-You have the right to access, update, and delete your personal information at any time. If you subscribed to our newsletter, you may also opt-out of receiving marketing communications from us by following the unsubscribe link included in our emails.
+You have the right to access, update, and delete your personal information at any time. If you subscribed to our newsletter,
+you may also opt-out of receiving marketing communications from us by following the unsubscribe link included in our emails.
 
 ## Contact Us
 
-If you have any questions or concerns about our privacy policy, please contact us at hello@jan.ai.
+If you have any questions or concerns about our privacy policy, please contact us at hello@menlo.ai.
diff --git a/docs/theme.config.tsx b/docs/theme.config.tsx
index b030fb97c..b09d9c492 100644
--- a/docs/theme.config.tsx
+++ b/docs/theme.config.tsx
@@ -65,49 +65,6 @@ const config: DocsThemeConfig = {
       </div>
     ),
   },
-  sidebar: {
-    titleComponent: ({ type, title }) => {
-      // eslint-disable-next-line react-hooks/rules-of-hooks
-      const { asPath } = useRouter()
-      if (type === 'separator' && title === 'Switcher') {
-        return (
-          <div className="-mx-2 hidden md:block">
-            {[
-              { title: 'Jan', path: '/docs', Icon: LibraryBig },
-              {
-                title: 'Cortex',
-                path: 'https://cortex.so/docs/',
-                Icon: BrainCircuit,
-              },
-              { title: 'Integrations', path: '/integrations', Icon: Blocks },
-            ].map((item) =>
-              asPath.startsWith(item.path) ? (
-                <div
-                  key={item.path}
-                  className="group mb-3 flex flex-row items-center gap-3 nx-text-primary-800 dark:nx-text-primary-600"
-                >
-                  <item.Icon className="w-7 h-7 p-1 border  border-gray-200 dark:border-gray-700 rounded nx-bg-primary-100 dark:nx-bg-primary-400/10" />
-                  {item.title}
-                </div>
-              ) : (
-                <Link
-                  href={item.path}
-                  key={item.path}
-                  className="group mb-3 flex flex-row items-center gap-3 text-gray-500 hover:text-primary/100"
-                >
-                  <item.Icon className="w-7 h-7 p-1 border rounded border-gray-200 dark:border-gray-700" />
-                  {item.title}
-                </Link>
-              )
-            )}
-          </div>
-        )
-      }
-      return title
-    },
-    defaultMenuCollapseLevel: 1,
-    toggleButton: true,
-  },
   toc: {
     backToTop: true,
   },