Merge branch 'dev' into docs-add-integration-lmstudio

2024-02-20 12:47:55 +07:00 · 2024-02-20 12:47:55 +07:00 · ad42b393e2
commit ad42b393e2
parent a1c1db6d88 6966c5d0ef
19 changed files with 280 additions and 114 deletions
--- a/16
+++ b/16
@ -14,9 +14,6 @@ COPY . ./
 RUN export NITRO_VERSION=$(cat extensions/inference-nitro-extension/bin/version.txt) && \
    jq --arg nitroVersion $NITRO_VERSION '(.scripts."downloadnitro:linux" | gsub("\\${NITRO_VERSION}"; $nitroVersion)) | gsub("\r"; "")' extensions/inference-nitro-extension/package.json > /tmp/newcommand.txt && export NEW_COMMAND=$(sed 's/^"//;s/"$//' /tmp/newcommand.txt) && jq --arg newCommand "$NEW_COMMAND" '.scripts."downloadnitro:linux" = $newCommand' extensions/inference-nitro-extension/package.json > /tmp/package.json && mv /tmp/package.json extensions/inference-nitro-extension/package.json
 RUN make install-and-build
-RUN yarn workspace jan-web install
-
-RUN export NODE_ENV=production && yarn workspace jan-web build

 # # 2. Rebuild the source code only when needed
 FROM base AS runner
@ -42,12 +39,13 @@ COPY --from=builder /app/docs/openapi ./docs/openapi/
 COPY --from=builder /app/pre-install ./pre-install/

 # Copy the package.json, yarn.lock, and output of web yarn space to leverage Docker cache
-COPY --from=builder /app/web/out ./web/out/
-COPY --from=builder /app/web/.next ./web/.next/
-COPY --from=builder /app/web/package.json ./web/package.json
-COPY --from=builder /app/web/yarn.lock ./web/yarn.lock
+COPY --from=builder /app/uikit ./uikit/
+COPY --from=builder /app/web ./web/
 COPY --from=builder /app/models ./models/

+RUN yarn workspace @janhq/uikit install && yarn workspace @janhq/uikit build
+RUN yarn workspace jan-web install
+
 RUN npm install -g serve@latest

 EXPOSE 1337 3000 3928
@ -55,7 +53,9 @@ EXPOSE 1337 3000 3928
 ENV JAN_API_HOST 0.0.0.0
 ENV JAN_API_PORT 1337

-CMD ["sh", "-c", "cd server && node build/main.js & cd web && npx serve out"]
+ENV API_BASE_URL http://localhost:1337
+
+CMD ["sh", "-c", "export NODE_ENV=production && yarn workspace jan-web build && cd web && npx serve out & cd server && node build/main.js"]

 # docker build -t jan .
 # docker run -p 1337:1337 -p 3000:3000 -p 3928:3928 jan
--- a/Dockerfile.gpu
+++ b/Dockerfile.gpu
@ -28,9 +28,6 @@ COPY . ./
 RUN export NITRO_VERSION=$(cat extensions/inference-nitro-extension/bin/version.txt) && \
    jq --arg nitroVersion $NITRO_VERSION '(.scripts."downloadnitro:linux" | gsub("\\${NITRO_VERSION}"; $nitroVersion)) | gsub("\r"; "")' extensions/inference-nitro-extension/package.json > /tmp/newcommand.txt && export NEW_COMMAND=$(sed 's/^"//;s/"$//' /tmp/newcommand.txt) && jq --arg newCommand "$NEW_COMMAND" '.scripts."downloadnitro:linux" = $newCommand' extensions/inference-nitro-extension/package.json > /tmp/package.json && mv /tmp/package.json extensions/inference-nitro-extension/package.json
 RUN make install-and-build
-RUN yarn workspace jan-web install
-
-RUN export NODE_ENV=production && yarn workspace jan-web build

 # # 2. Rebuild the source code only when needed
 FROM base AS runner
@ -66,12 +63,13 @@ COPY --from=builder /app/docs/openapi ./docs/openapi/
 COPY --from=builder /app/pre-install ./pre-install/

 # Copy the package.json, yarn.lock, and output of web yarn space to leverage Docker cache
-COPY --from=builder /app/web/out ./web/out/
-COPY --from=builder /app/web/.next ./web/.next/
-COPY --from=builder /app/web/package.json ./web/package.json
-COPY --from=builder /app/web/yarn.lock ./web/yarn.lock
+COPY --from=builder /app/uikit ./uikit/
+COPY --from=builder /app/web ./web/
 COPY --from=builder /app/models ./models/

+RUN yarn workspace @janhq/uikit install && yarn workspace @janhq/uikit build
+RUN yarn workspace jan-web install
+
 RUN npm install -g serve@latest

 EXPOSE 1337 3000 3928
@ -81,7 +79,9 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/targets/x86_64-linux/lib:/usr/local/cuda-12.
 ENV JAN_API_HOST 0.0.0.0
 ENV JAN_API_PORT 1337

-CMD ["sh", "-c", "cd server && node build/main.js & cd web && npx serve out"]
+ENV API_BASE_URL http://localhost:1337
+
+CMD ["sh", "-c", "export NODE_ENV=production && yarn workspace jan-web build && cd web && npx serve out & cd server && node build/main.js"]

 # pre-requisites: nvidia-docker
 # docker build -t jan-gpu . -f Dockerfile.gpu
--- a/12
+++ b/12
@ -52,18 +52,28 @@ build: check-file-counts

 clean:
 ifeq ($(OS),Windows_NT)
-	powershell -Command "Get-ChildItem -Path . -Include node_modules, .next, dist -Recurse -Directory | Remove-Item -Recurse -Force"
+	powershell -Command "Get-ChildItem -Path . -Include node_modules, .next, dist, build, out -Recurse -Directory | Remove-Item -Recurse -Force"
+	powershell -Command "Remove-Item -Recurse -Force ./pre-install/*.tgz"
+	powershell -Command "Remove-Item -Recurse -Force ./electron/pre-install/*.tgz"
 	rmdir /s /q "%USERPROFILE%\jan\extensions"
 else ifeq ($(shell uname -s),Linux)
 	find . -name "node_modules" -type d -prune -exec rm -rf '{}' +
 	find . -name ".next" -type d -exec rm -rf '{}' +
 	find . -name "dist" -type d -exec rm -rf '{}' +
+	find . -name "build" -type d -exec rm -rf '{}' +
+	find . -name "out" -type d -exec rm -rf '{}' +
+	rm -rf ./pre-install/*.tgz
+	rm -rf ./electron/pre-install/*.tgz
 	rm -rf "~/jan/extensions"
 	rm -rf "~/.cache/jan*"
 else
 	find . -name "node_modules" -type d -prune -exec rm -rf '{}' +
 	find . -name ".next" -type d -exec rm -rf '{}' +
 	find . -name "dist" -type d -exec rm -rf '{}' +
+	find . -name "build" -type d -exec rm -rf '{}' +
+	find . -name "out" -type d -exec rm -rf '{}' +
+	rm -rf ./pre-install/*.tgz
+	rm -rf ./electron/pre-install/*.tgz
 	rm -rf ~/jan/extensions
 	rm -rf ~/Library/Caches/jan*
 endif
--- a/README.md
+++ b/README.md
@ -76,31 +76,31 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
  <tr style="text-align:center">
    <td style="text-align:center"><b>Experimental (Nightly Build)</b></td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-win-x64-0.4.6-275.exe'>
+      <a href='https://delta.jan.ai/latest/jan-win-x64-0.4.6-276.exe'>
        <img src='./docs/static/img/windows.png' style="height:14px; width: 14px" />
        <b>jan.exe</b>
      </a>
    </td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-mac-x64-0.4.6-275.dmg'>
+      <a href='https://delta.jan.ai/latest/jan-mac-x64-0.4.6-276.dmg'>
        <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
        <b>Intel</b>
      </a>
    </td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-mac-arm64-0.4.6-275.dmg'>
+      <a href='https://delta.jan.ai/latest/jan-mac-arm64-0.4.6-276.dmg'>
        <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
        <b>M1/M2</b>
      </a>
    </td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-linux-amd64-0.4.6-275.deb'>
+      <a href='https://delta.jan.ai/latest/jan-linux-amd64-0.4.6-276.deb'>
        <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
        <b>jan.deb</b>
      </a>
    </td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-linux-x86_64-0.4.6-275.AppImage'>
+      <a href='https://delta.jan.ai/latest/jan-linux-x86_64-0.4.6-276.AppImage'>
        <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
        <b>jan.AppImage</b>
      </a>
@ -235,61 +235,84 @@ This will build the app MacOS m1/m2 for production (with code signing already do

 - Run Jan in Docker mode

-  - **Option 1**: Run Jan in CPU mode
+| Docker compose Profile | Description                                  |
+| ---------------------- | -------------------------------------------- |
+| `cpu-fs`               | Run Jan in CPU mode with default file system |
+| `cpu-s3fs`             | Run Jan in CPU mode with S3 file system      |
+| `gpu-fs`               | Run Jan in GPU mode with default file system |
+| `gpu-s3fs`             | Run Jan in GPU mode with S3 file system      |
+
+| Environment Variable    | Description                                                                                             |
+| ----------------------- | ------------------------------------------------------------------------------------------------------- |
+| `S3_BUCKET_NAME`        | S3 bucket name - leave blank for default file system                                                    |
+| `AWS_ACCESS_KEY_ID`     | AWS access key ID - leave blank for default file system                                                 |
+| `AWS_SECRET_ACCESS_KEY` | AWS secret access key - leave blank for default file system                                             |
+| `AWS_ENDPOINT`          | AWS endpoint URL - leave blank for default file system                                                  |
+| `AWS_REGION`            | AWS region - leave blank for default file system                                                        |
+| `API_BASE_URL`          | Jan Server URL, please modify it as your public ip address or domain name default http://localhost:1377 |
+
+- **Option 1**: Run Jan in CPU mode
+
+  ```bash
+  # cpu mode with default file system
+  docker compose --profile cpu-fs up -d
+
+  # cpu mode with S3 file system
+  docker compose --profile cpu-s3fs up -d
+  ```
+
+- **Option 2**: Run Jan in GPU mode
+
+  - **Step 1**: Check CUDA compatibility with your NVIDIA driver by running `nvidia-smi` and check the CUDA version in the output

    ```bash
-    docker compose --profile cpu up -d
+    nvidia-smi
+
+    # Output
+    +---------------------------------------------------------------------------------------+
+    | NVIDIA-SMI 531.18                 Driver Version: 531.18       CUDA Version: 12.1     |
+    |-----------------------------------------+----------------------+----------------------+
+    | GPU  Name                      TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
+    | Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
+    |                                         |                      |               MIG M. |
+    |=========================================+======================+======================|
+    |   0  NVIDIA GeForce RTX 4070 Ti    WDDM | 00000000:01:00.0  On |                  N/A |
+    |  0%   44C    P8               16W / 285W|   1481MiB / 12282MiB |      2%      Default |
+    |                                         |                      |                  N/A |
+    +-----------------------------------------+----------------------+----------------------+
+    |   1  NVIDIA GeForce GTX 1660 Ti    WDDM | 00000000:02:00.0 Off |                  N/A |
+    |  0%   49C    P8               14W / 120W|      0MiB /  6144MiB |      0%      Default |
+    |                                         |                      |                  N/A |
+    +-----------------------------------------+----------------------+----------------------+
+    |   2  NVIDIA GeForce GTX 1660 Ti    WDDM | 00000000:05:00.0 Off |                  N/A |
+    | 29%   38C    P8               11W / 120W|      0MiB /  6144MiB |      0%      Default |
+    |                                         |                      |                  N/A |
+    +-----------------------------------------+----------------------+----------------------+
+
+    +---------------------------------------------------------------------------------------+
+    | Processes:                                                                            |
+    |  GPU   GI   CI        PID   Type   Process name                            GPU Memory |
+    |        ID   ID                                                             Usage      |
+    |=======================================================================================|
    ```

-  - **Option 2**: Run Jan in GPU mode
+  - **Step 2**: Visit [NVIDIA NGC Catalog ](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags) and find the smallest minor version of image tag that matches your CUDA version (e.g., 12.1 -> 12.1.0)

-    - **Step 1**: Check CUDA compatibility with your NVIDIA driver by running `nvidia-smi` and check the CUDA version in the output
+  - **Step 3**: Update the `Dockerfile.gpu` line number 5 with the latest minor version of the image tag from step 2 (e.g. change `FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base` to `FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 AS base`)

-      ```bash
-      nvidia-smi
+  - **Step 4**: Run command to start Jan in GPU mode

-      # Output
-      +---------------------------------------------------------------------------------------+
-      | NVIDIA-SMI 531.18                 Driver Version: 531.18       CUDA Version: 12.1     |
-      |-----------------------------------------+----------------------+----------------------+
-      | GPU  Name                      TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
-      | Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
-      |                                         |                      |               MIG M. |
-      |=========================================+======================+======================|
-      |   0  NVIDIA GeForce RTX 4070 Ti    WDDM | 00000000:01:00.0  On |                  N/A |
-      |  0%   44C    P8               16W / 285W|   1481MiB / 12282MiB |      2%      Default |
-      |                                         |                      |                  N/A |
-      +-----------------------------------------+----------------------+----------------------+
-      |   1  NVIDIA GeForce GTX 1660 Ti    WDDM | 00000000:02:00.0 Off |                  N/A |
-      |  0%   49C    P8               14W / 120W|      0MiB /  6144MiB |      0%      Default |
-      |                                         |                      |                  N/A |
-      +-----------------------------------------+----------------------+----------------------+
-      |   2  NVIDIA GeForce GTX 1660 Ti    WDDM | 00000000:05:00.0 Off |                  N/A |
-      | 29%   38C    P8               11W / 120W|      0MiB /  6144MiB |      0%      Default |
-      |                                         |                      |                  N/A |
-      +-----------------------------------------+----------------------+----------------------+
+    ```bash
+    # GPU mode with default file system
+    docker compose --profile gpu up -d

-      +---------------------------------------------------------------------------------------+
-      | Processes:                                                                            |
-      |  GPU   GI   CI        PID   Type   Process name                            GPU Memory |
-      |        ID   ID                                                             Usage      |
-      |=======================================================================================|
-      ```
+    # GPU mode with S3 file system
+    docker compose --profile gpu-s3fs up -d
+    ```

-    - **Step 2**: Visit [NVIDIA NGC Catalog ](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags) and find the smallest minor version of image tag that matches your CUDA version (e.g., 12.1 -> 12.1.0)
+This will start the web server and you can access Jan at `http://localhost:3000`.

-    - **Step 3**: Update the `Dockerfile.gpu` line number 5 with the latest minor version of the image tag from step 2 (e.g. change `FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base` to `FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 AS base`)
-
-    - **Step 4**: Run command to start Jan in GPU mode
-
-      ```bash
-      # GPU mode
-      docker compose --profile gpu up -d
-      ```
-
-  This will start the web server and you can access Jan at `http://localhost:3000`.
-
-  > Note: Currently, Docker mode is only work for development and localhost, production is not supported yet. RAG feature is not supported in Docker mode yet.
+> Note: RAG feature is not supported in Docker mode with s3fs yet.

 ## Acknowledgements

--- a/core/src/node/api/processors/app.ts
+++ b/core/src/node/api/processors/app.ts
@ -1,6 +1,5 @@
 import { basename, isAbsolute, join, relative } from 'path'

-import { AppRoute } from '../../../api'
 import { Processor } from './Processor'
 import { getAppConfigurations as appConfiguration, updateAppConfiguration } from '../../helper'
 import { log as writeLog, logServer as writeServerLog } from '../../helper/log'
--- a/core/src/node/api/restful/common.ts
+++ b/core/src/node/api/restful/common.ts
@ -8,7 +8,7 @@ import {
  createMessage,
  createThread,
  getMessages,
-  retrieveMesasge,
+  retrieveMessage,
  updateThread,
 } from './helper/builder'

@ -17,10 +17,18 @@ import { startModel, stopModel } from './helper/startStopModel'
 import { ModelSettingParams } from '../../../types'

 export const commonRouter = async (app: HttpServer) => {
+  const normalizeData = (data: any) => {
+    return {
+      object: 'list',
+      data,
+    }
+  }
  // Common Routes
  // Read & Delete :: Threads | Models | Assistants
  Object.keys(JanApiRouteConfiguration).forEach((key) => {
-    app.get(`/${key}`, async (_request) => getBuilder(JanApiRouteConfiguration[key]))
+    app.get(`/${key}`, async (_request) =>
+      getBuilder(JanApiRouteConfiguration[key]).then(normalizeData)
+    )

    app.get(`/${key}/:id`, async (request: any) =>
      retrieveBuilder(JanApiRouteConfiguration[key], request.params.id)
@ -34,10 +42,12 @@ export const commonRouter = async (app: HttpServer) => {
  // Threads
  app.post(`/threads/`, async (req, res) => createThread(req.body))

-  app.get(`/threads/:threadId/messages`, async (req, res) => getMessages(req.params.threadId))
+  app.get(`/threads/:threadId/messages`, async (req, res) =>
+    getMessages(req.params.threadId).then(normalizeData)
+  )

  app.get(`/threads/:threadId/messages/:messageId`, async (req, res) =>
-    retrieveMesasge(req.params.threadId, req.params.messageId)
+    retrieveMessage(req.params.threadId, req.params.messageId)
  )

  app.post(`/threads/:threadId/messages`, async (req, res) =>
--- a/core/src/node/api/restful/helper/builder.ts
+++ b/core/src/node/api/restful/helper/builder.ts
@ -125,7 +125,7 @@ export const getMessages = async (threadId: string): Promise<ThreadMessage[]> =>
  }
 }

-export const retrieveMesasge = async (threadId: string, messageId: string) => {
+export const retrieveMessage = async (threadId: string, messageId: string) => {
  const messages = await getMessages(threadId)
  const filteredMessages = messages.filter((m) => m.id === messageId)
  if (!filteredMessages || filteredMessages.length === 0) {
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -42,10 +42,10 @@ services:
      vpcbr:

  # app_cpu service for running the CPU version of the application
-  app_cpu:
+  app_cpu_s3fs:
    image: jan:latest
    volumes:
-      - app_data:/app/server/build/jan
+      - app_data_cpu_s3fs:/app/server/build/jan
    build:
      context: .
      dockerfile: Dockerfile
@ -56,9 +56,10 @@ services:
      S3_BUCKET_NAME: mybucket
      AWS_ENDPOINT: http://10.5.0.2:9000
      AWS_REGION: us-east-1
+      API_BASE_URL: http://localhost:1337
    restart: always
    profiles:
-      - cpu
+      - cpu-s3fs
    ports:
      - "3000:3000"
      - "1337:1337"
@ -68,7 +69,7 @@ services:
        ipv4_address: 10.5.0.3

  # app_gpu service for running the GPU version of the application
-  app_gpu:
+  app_gpu_s3fs:
    deploy:
      resources:
        reservations:
@ -78,7 +79,7 @@ services:
            capabilities: [gpu]
    image: jan-gpu:latest
    volumes:
-      - app_data:/app/server/build/jan
+      - app_data_gpu_s3fs:/app/server/build/jan
    build:
      context: .
      dockerfile: Dockerfile.gpu
@ -90,8 +91,9 @@ services:
      S3_BUCKET_NAME: mybucket
      AWS_ENDPOINT: http://10.5.0.2:9000
      AWS_REGION: us-east-1
+      API_BASE_URL: http://localhost:1337
    profiles:
-      - gpu
+      - gpu-s3fs
    ports:
      - "3000:3000"
      - "1337:1337"
@ -100,9 +102,60 @@ services:
      vpcbr:
        ipv4_address: 10.5.0.4

+  app_cpu_fs:
+    image: jan:latest
+    volumes:
+      - app_data_cpu_fs:/app/server/build/jan
+    build:
+      context: .
+      dockerfile: Dockerfile
+    environment:
+      API_BASE_URL: http://localhost:1337
+    restart: always
+    profiles:
+      - cpu-fs
+    ports:
+      - "3000:3000"
+      - "1337:1337"
+      - "3928:3928"
+    networks:
+      vpcbr:
+        ipv4_address: 10.5.0.5
+
+  # app_gpu service for running the GPU version of the application
+  app_gpu_fs:
+    deploy:
+      resources:
+        reservations:
+          devices:
+          - driver: nvidia
+            count: all
+            capabilities: [gpu]
+    image: jan-gpu:latest
+    volumes:
+      - app_data_gpu_fs:/app/server/build/jan
+    build:
+      context: .
+      dockerfile: Dockerfile.gpu
+    restart: always
+    environment:
+      API_BASE_URL: http://localhost:1337
+    profiles:
+      - gpu-fs
+    ports:
+      - "3000:3000"
+      - "1337:1337"
+      - "3928:3928"
+    networks:
+      vpcbr:
+        ipv4_address: 10.5.0.6
+
 volumes:
  minio_data:
-  app_data:
+  app_data_cpu_s3fs:
+  app_data_gpu_s3fs:
+  app_data_cpu_fs:
+  app_data_gpu_fs:

 networks:
  vpcbr:
@ -113,5 +166,7 @@ networks:
         gateway: 10.5.0.1

 # Usage:
-# - Run 'docker-compose --profile cpu up -d' to start the app_cpu service
-# - Run 'docker-compose --profile gpu up -d' to start the app_gpu service
+# - Run 'docker compose --profile cpu-s3fs up -d' to start the app_cpu service
+# - Run 'docker compose --profile gpu-s3fs up -d' to start the app_gpu service
+# - Run 'docker compose --profile cpu-fs up -d' to start the app_cpu service
+# - Run 'docker compose --profile gpu-fs up -d' to start the app_gpu service
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@ -88,7 +88,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
    const electronApi = window?.electronAPI
    this.inferenceUrl = INFERENCE_URL
    if (!electronApi) {
-      this.inferenceUrl = JAN_SERVER_INFERENCE_URL
+      this.inferenceUrl = `${window.core?.api?.baseApiUrl}/v1/chat/completions`
    }
    console.debug('Inference url: ', this.inferenceUrl)

--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@ -1,6 +1,6 @@
 {
  "name": "@janhq/model-extension",
-  "version": "1.0.24",
+  "version": "1.0.25",
  "description": "Model Management Extension provides model exploration and seamless downloads",
  "main": "dist/index.js",
  "module": "dist/module.js",
--- a/models/dolphin-phi-2/model.json
+++ b/models/dolphin-phi-2/model.json
@ -1,5 +1,5 @@
 {
-    "source": [
+    "sources": [
      {
        "url": "https://huggingface.co/TheBloke/dolphin-2_6-phi-2-GGUF/resolve/main/dolphin-2_6-phi-2.Q8_0.gguf",
        "filename": "dolphin-2_6-phi-2.Q8_0.gguf"
--- a/models/llamacorn-1.1b/model.json
+++ b/models/llamacorn-1.1b/model.json
@ -1,5 +1,5 @@
 {
-    "source": [
+    "sources": [
        {
            "url":"https://huggingface.co/janhq/llamacorn-1.1b-chat-GGUF/resolve/main/llamacorn-1.1b-chat.Q8_0.gguf",
            "filename": "llamacorn-1.1b-chat.Q8_0.gguf"
--- a/models/stable-zephyr-3b/model.json
+++ b/models/stable-zephyr-3b/model.json
@ -1,5 +1,5 @@
 {
-    "source": [
+    "sources": [
      {
        "url": "https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF/resolve/main/stablelm-zephyr-3b.Q8_0.gguf",
        "filename": "stablelm-zephyr-3b.Q8_0.gguf"
--- a/server/helpers/setup.ts
+++ b/server/helpers/setup.ts
@ -31,6 +31,32 @@ export async function setup() {
    'utf-8'
  )

+  if (!existsSync(join(appDir, 'settings'))) {
+    console.debug('Writing nvidia config file...')
+    mkdirSync(join(appDir, 'settings'))
+    writeFileSync(
+      join(appDir, 'settings', 'settings.json'),
+      JSON.stringify(
+        {
+          notify: true,
+          run_mode: 'cpu',
+          nvidia_driver: {
+            exist: false,
+            version: '',
+          },
+          cuda: {
+            exist: false,
+            version: '',
+          },
+          gpus: [],
+          gpu_highest_vram: '',
+          gpus_in_use: [],
+          is_initial: true,
+        }),
+      'utf-8'
+    )
+  }
+
  /**
   * Install extensions
   */
--- a/server/index.ts
+++ b/server/index.ts
@ -7,6 +7,7 @@ import {
  getJanExtensionsPath,
 } from '@janhq/core/node'
 import { join } from 'path'
+import tcpPortUsed from 'tcp-port-used'

 // Load environment variables
 dotenv.config()
@ -46,6 +47,15 @@ export interface ServerConfig {
 * @param configs - Server configurations
 */
 export const startServer = async (configs?: ServerConfig): Promise<boolean> => {
+  if (configs?.port && configs?.host) {
+    const inUse = await tcpPortUsed.check(Number(configs.port), configs.host)
+    if (inUse) {
+      const errorMessage = `Port ${configs.port} is already in use.`
+      logServer(errorMessage)
+      throw new Error(errorMessage)
+    }
+  }
+
  // Update server settings
  isVerbose = configs?.isVerboseEnabled ?? true
  hostSetting = configs?.host ?? JAN_API_HOST
--- a/web/extension/ExtensionManager.ts
+++ b/web/extension/ExtensionManager.ts
@ -83,7 +83,10 @@ export class ExtensionManager {
    // Import class
    const extensionUrl = window.electronAPI
      ? extension.url
-      : extension.url.replace('extension://', `${API_BASE_URL}/extensions/`)
+      : extension.url.replace(
+          'extension://',
+          `${window.core?.api?.baseApiUrl ?? ''}/extensions/`
+        )
    await import(/* webpackIgnore: true */ extensionUrl).then(
      (extensionClass) => {
        // Register class if it has a default export
--- a/web/next.config.js
+++ b/web/next.config.js
@ -6,6 +6,11 @@ const webpack = require('webpack')
 const packageJson = require('./package.json')

 const nextConfig = {
+  eslint: {
+    // Warning: This allows production builds to successfully complete even if
+    // your project has ESLint errors.
+    ignoreDuringBuilds: true,
+  },
  output: 'export',
  assetPrefix: '.',
  images: {
--- a/web/screens/LocalServer/index.tsx
+++ b/web/screens/LocalServer/index.tsx
@ -39,6 +39,8 @@ import ModalTroubleShooting, {
 } from '@/containers/ModalTroubleShoot'
 import ServerLogs from '@/containers/ServerLogs'

+import { toaster } from '@/containers/Toast'
+
 import { loadModelErrorAtom, useActiveModel } from '@/hooks/useActiveModel'
 import { useLogs } from '@/hooks/useLogs'

@ -106,6 +108,45 @@ const LocalServerScreen = () => {
    handleChangePort(port)
  }, [handleChangePort, port])

+  const onStartServerClick = async () => {
+    if (selectedModel == null) return
+    try {
+      const isStarted = await window.core?.api?.startServer({
+        host,
+        port,
+        isCorsEnabled,
+        isVerboseEnabled,
+      })
+      await startModel(selectedModel.id)
+      if (isStarted) setServerEnabled(true)
+      if (firstTimeVisitAPIServer) {
+        localStorage.setItem(FIRST_TIME_VISIT_API_SERVER, 'false')
+        setFirstTimeVisitAPIServer(false)
+      }
+    } catch (e) {
+      console.error(e)
+      toaster({
+        title: `Failed to start server!`,
+        description: 'Please check Server Logs for more details.',
+        type: 'error',
+      })
+    }
+  }
+
+  const onStopServerClick = async () => {
+    window.core?.api?.stopServer()
+    setServerEnabled(false)
+    setLoadModelError(undefined)
+  }
+
+  const onToggleServer = async () => {
+    if (serverEnabled) {
+      await onStopServerClick()
+    } else {
+      await onStartServerClick()
+    }
+  }
+
  return (
    <div className="flex h-full w-full" data-testid="local-server-testid">
      {/* Left SideBar */}
@ -122,26 +163,7 @@ const LocalServerScreen = () => {
              block
              themes={serverEnabled ? 'danger' : 'primary'}
              disabled={stateModel.loading || errorRangePort || !selectedModel}
-              onClick={async () => {
-                if (serverEnabled) {
-                  window.core?.api?.stopServer()
-                  setServerEnabled(false)
-                  setLoadModelError(undefined)
-                } else {
-                  startModel(String(selectedModel?.id))
-                  const isStarted = await window.core?.api?.startServer({
-                    host,
-                    port,
-                    isCorsEnabled,
-                    isVerboseEnabled,
-                  })
-                  if (isStarted) setServerEnabled(true)
-                  if (firstTimeVisitAPIServer) {
-                    localStorage.setItem(FIRST_TIME_VISIT_API_SERVER, 'false')
-                    setFirstTimeVisitAPIServer(false)
-                  }
-                }
-              }}
+              onClick={onToggleServer}
            >
              {serverEnabled ? 'Stop' : 'Start'} Server
            </Button>
--- a/web/services/restService.ts
+++ b/web/services/restService.ts
@ -18,11 +18,14 @@ export const restAPI = {
      ...acc,
      [proxy.route]: (...args: any) => {
        // For each route, define a function that sends a request to the API
-        return fetch(`${API_BASE_URL}/v1/${proxy.path}/${proxy.route}`, {
-          method: 'POST',
-          body: JSON.stringify(args),
-          headers: { contentType: 'application/json' },
-        }).then(async (res) => {
+        return fetch(
+          `${window.core?.api.baseApiUrl}/v1/${proxy.path}/${proxy.route}`,
+          {
+            method: 'POST',
+            body: JSON.stringify(args),
+            headers: { contentType: 'application/json' },
+          }
+        ).then(async (res) => {
          try {
            if (proxy.path === 'fs') {
              const text = await res.text()
@ -38,6 +41,6 @@ export const restAPI = {
  }, {}),
  openExternalUrl,
  // Jan Server URL
-  baseApiUrl: API_BASE_URL,
+  baseApiUrl: process.env.API_BASE_URL ?? API_BASE_URL,
  pollingInterval: 5000,
 }