From 6aae985a5564de51fc9078dea80a2042dcefe7f9 Mon Sep 17 00:00:00 2001
From: Louis <133622055+louis-jan@users.noreply.github.com>
Date: Tue, 12 Sep 2023 16:29:26 +0700
Subject: [PATCH] re-#143: use OpenAI decoder and mutate final result from
 client (#164)

* chore: use OpenAI parser

* chore: access host's services

* chore: take out llm service - GGUF model for the latest llama.cpp support
---
 conf/sample.env_web-client                    |  3 +-
 docker-compose.yml                            | 14 ---
 run.sh                                        |  4 +-
 web-client/app/_components/ChatBody/index.tsx |  3 +
 .../_components/StreamTextMessage/index.tsx   | 86 ++++++++++++-------
 web-client/app/api/openai/route.ts            | 26 ++++++
 6 files changed, 87 insertions(+), 49 deletions(-)
 create mode 100644 web-client/app/api/openai/route.ts
diff --git a/conf/sample.env_web-client b/conf/sample.env_web-client
index 897feeb36..35fc6972c 100644
--- a/conf/sample.env_web-client
+++ b/conf/sample.env_web-client
@@ -5,7 +5,8 @@ NEXT_PUBLIC_DOWNLOAD_APP_IOS=#
 NEXT_PUBLIC_DOWNLOAD_APP_ANDROID=#
 NEXT_PUBLIC_GRAPHQL_ENGINE_URL=http://localhost:8080/v1/graphql 
 NEXT_PUBLIC_GRAPHQL_ENGINE_WEB_SOCKET_URL=ws://localhost:8080/v1/graphql
-NEXT_PUBLIC_OPENAPI_ENDPOINT=http://localhost:8000/v1/completions
+OPENAPI_ENDPOINT=http://host.docker.internal:8000/v1
+OPENAPI_KEY=openapikey
 KEYCLOAK_CLIENT_ID=hasura
 KEYCLOAK_CLIENT_SECRET=oMtCPAV7diKpE564SBspgKj4HqlKM4Hy
 AUTH_ISSUER=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID
diff --git a/docker-compose.yml b/docker-compose.yml
index 092e31e91..2a0c2bad7 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -112,20 +112,6 @@ services:
       jan_community:
         ipv4_address: 172.20.0.15
 
-  llm:
-    image: ghcr.io/abetlen/llama-cpp-python@sha256:b6d21ff8c4d9baad65e1fa741a0f8c898d68735fff3f3cd777e3f0c6a1839dd4
-    volumes:
-      - ./jan-inference/llm/models:/models
-    ports:
-      - 8000:8000
-    environment:
-      MODEL: /models/${LLM_MODEL_FILE}
-      PYTHONUNBUFFERED: 1
-    restart: on-failure
-    networks:
-      jan_community:
-        ipv4_address: 172.20.0.18
-
 networks:
   jan_community:
     driver: bridge
diff --git a/run.sh b/run.sh
index 043b0c1cb..cc9fe8c44 100755
--- a/run.sh
+++ b/run.sh
@@ -124,10 +124,10 @@ progress 'cp -f sample.env .env' "Prepare .env file" $((step++))
 ###
 
 ### Download Model
-if [ -f "jan-inference/llm/models/llama-2-7b-chat.ggmlv3.q4_1.bin" ]; then
+if [ -f "jan-inference/llm/models/llama-2-7b.Q4_K_S.gguf" ]; then
     progress '' "Llama model - Installed" $((step++))
 else
-    progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -P jan-inference/llm/models' "Download Llama model" $((step++))
+    progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_S.gguf -P jan-inference/llm/models' "Download Llama model" $((step++))
 fi
 ###
 
diff --git a/web-client/app/_components/ChatBody/index.tsx b/web-client/app/_components/ChatBody/index.tsx
index 95768cd19..5a7928684 100644
--- a/web-client/app/_components/ChatBody/index.tsx
+++ b/web-client/app/_components/ChatBody/index.tsx
@@ -128,6 +128,7 @@ export const ChatBody: React.FC<Props> = observer(({ onPromptSelected }) => {
 const renderItem = (
   index: number,
   {
+    id,
     messageType,
     senderAvatarUrl,
     senderName,
@@ -172,9 +173,11 @@ const renderItem = (
       ) : (
         <StreamTextMessage
           key={index}
+          id={id}
           avatarUrl={senderAvatarUrl ?? "/icons/app_icon.svg"}
           senderName={senderName}
           createdAt={createdAt}
+          text={text}
         />
       );
     default:
diff --git a/web-client/app/_components/StreamTextMessage/index.tsx b/web-client/app/_components/StreamTextMessage/index.tsx
index 4da7d9857..f39b06052 100644
--- a/web-client/app/_components/StreamTextMessage/index.tsx
+++ b/web-client/app/_components/StreamTextMessage/index.tsx
@@ -1,9 +1,16 @@
-import React from "react";
+import React, { useEffect } from "react";
 import { displayDate } from "@/_utils/datetime";
 import { useStore } from "@/_models/RootStore";
-import { StreamingText, StreamingTextURL, useTextBuffer } from "nextjs-openai";
-import { MessageSenderType } from "@/_models/ChatMessage";
+import { StreamingText, useTextBuffer } from "nextjs-openai";
+import { MessageSenderType, MessageStatus } from "@/_models/ChatMessage";
 import { Role } from "@/_models/History";
+import { useMutation } from "@apollo/client";
+import { OpenAI } from "openai-streams";
+import {
+  UpdateMessageDocument,
+  UpdateMessageMutation,
+  UpdateMessageMutationVariables,
+} from "@/graphql";
 
 type Props = {
   id?: string;
@@ -14,6 +21,7 @@ type Props = {
 };
 
 const StreamTextMessage: React.FC<Props> = ({
+  id,
   senderName,
   createdAt,
   avatarUrl = "",
@@ -21,43 +29,59 @@ const StreamTextMessage: React.FC<Props> = ({
   const [data, setData] = React.useState<any | undefined>();
   const { historyStore } = useStore();
   const conversation = historyStore?.getActiveConversation();
+  const [updateMessage] = useMutation<UpdateMessageMutation>(
+    UpdateMessageDocument
+  );
 
   React.useEffect(() => {
-    const messages = conversation?.chatMessages.slice(-5).map((e) => ({
-      role:
-        e.messageSenderType === MessageSenderType.User
-          ? Role.User
-          : Role.Assistant,
-      content: e.text,
-    }));
+    if (
+      !conversation ||
+      conversation.chatMessages.findIndex((e) => e.id === id) !==
+        conversation.chatMessages.length - 1
+    ) {
+      return;
+    }
+    const messages = conversation?.chatMessages
+      .slice(-10)
+      .filter((e) => e.id !== id)
+      .map((e) => ({
+        role:
+          e.messageSenderType === MessageSenderType.User
+            ? Role.User
+            : Role.Assistant,
+        content: e.text,
+      }));
     setData({
       messages,
-      stream: true,
-      model: "gpt-3.5-turbo",
-      max_tokens: 500,
     });
   }, [conversation]);
 
-  const { buffer, refresh, cancel } = useTextBuffer({
-    url: `${process.env.NEXT_PUBLIC_OPENAPI_ENDPOINT}`,
-    throttle: 100,
+  const { buffer, done } = useTextBuffer({
+    url: `api/openai`,
     data,
-
-    options: {
-      headers: {
-        "Content-Type": "application/json",
-      },
-    },
   });
 
-  const parsedBuffer = (buffer: String) => {
-    try {
-      const json = buffer.replace("data: ", "");
-      return JSON.parse(json).choices[0].text;
-    } catch (e) {
-      return "";
+  useEffect(() => {
+    if (done) {
+      // mutate result
+      const variables: UpdateMessageMutationVariables = {
+        id: id,
+        data: {
+          content: buffer.join(""),
+          status: MessageStatus.Ready,
+        },
+      };
+      updateMessage({
+        variables,
+      });
     }
-  };
+  }, [done]);
+
+  useEffect(() => {
+    if (buffer.length > 0 && conversation?.isWaitingForModelResponse) {
+      historyStore.finishActiveConversationWaiting();
+    }
+  }, [buffer]);
 
   return data ? (
     <div className="flex items-start gap-2">
@@ -78,9 +102,7 @@ const StreamTextMessage: React.FC<Props> = ({
           </div>
         </div>
         <div className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]">
-          <StreamingText
-            buffer={buffer.map((b) => parsedBuffer(b))}
-          ></StreamingText>
+          <StreamingText buffer={buffer} fade={100} />
         </div>
       </div>
     </div>
diff --git a/web-client/app/api/openai/route.ts b/web-client/app/api/openai/route.ts
new file mode 100644
index 000000000..4ed892ccb
--- /dev/null
+++ b/web-client/app/api/openai/route.ts
@@ -0,0 +1,26 @@
+import { OpenAI } from "openai-streams";
+
+export async function POST(req: Request) {
+  const { messages } = await req.json();
+  if (!messages) {
+    return new Response(null, {
+      status: 400,
+      statusText: "Did not include `messages` parameter",
+    });
+  }
+  const completionsStream = await OpenAI(
+    "chat",
+    {
+      model: "gpt-3.5-turbo",
+      stream: true,
+      messages,
+      max_tokens: 500,
+    },
+    {
+      apiBase: process.env.OPENAPI_ENDPOINT,
+      apiKey: process.env.OPENAPI_KEY,
+    }
+  );
+
+  return new Response(completionsStream);
+}