re-#143: use OpenAI decoder and mutate final result from client (#164)

* chore: use OpenAI parser * chore: access host's services * chore: take out llm service - GGUF model for the latest llama.cpp support
2023-09-12 16:29:26 +07:00 · 2023-09-12 16:29:26 +07:00 · 6aae985a55
commit 6aae985a55
parent 83d2e34bd7
6 changed files with 87 additions and 49 deletions
--- a/conf/sample.env_web-client
+++ b/conf/sample.env_web-client
@ -5,7 +5,8 @@ NEXT_PUBLIC_DOWNLOAD_APP_IOS=#
 NEXT_PUBLIC_DOWNLOAD_APP_ANDROID=#
 NEXT_PUBLIC_GRAPHQL_ENGINE_URL=http://localhost:8080/v1/graphql 
 NEXT_PUBLIC_GRAPHQL_ENGINE_WEB_SOCKET_URL=ws://localhost:8080/v1/graphql
-NEXT_PUBLIC_OPENAPI_ENDPOINT=http://localhost:8000/v1/completions
+OPENAPI_ENDPOINT=http://host.docker.internal:8000/v1
+OPENAPI_KEY=openapikey
 KEYCLOAK_CLIENT_ID=hasura
 KEYCLOAK_CLIENT_SECRET=oMtCPAV7diKpE564SBspgKj4HqlKM4Hy
 AUTH_ISSUER=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -112,20 +112,6 @@ services:
      jan_community:
        ipv4_address: 172.20.0.15

-  llm:
-    image: ghcr.io/abetlen/llama-cpp-python@sha256:b6d21ff8c4d9baad65e1fa741a0f8c898d68735fff3f3cd777e3f0c6a1839dd4
-    volumes:
-      - ./jan-inference/llm/models:/models
-    ports:
-      - 8000:8000
-    environment:
-      MODEL: /models/${LLM_MODEL_FILE}
-      PYTHONUNBUFFERED: 1
-    restart: on-failure
-    networks:
-      jan_community:
-        ipv4_address: 172.20.0.18
-
 networks:
  jan_community:
    driver: bridge
--- a/run.sh
+++ b/run.sh
@ -124,10 +124,10 @@ progress 'cp -f sample.env .env' "Prepare .env file" $((step++))
 ###

 ### Download Model
-if [ -f "jan-inference/llm/models/llama-2-7b-chat.ggmlv3.q4_1.bin" ]; then
+if [ -f "jan-inference/llm/models/llama-2-7b.Q4_K_S.gguf" ]; then
    progress '' "Llama model - Installed" $((step++))
 else
-    progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -P jan-inference/llm/models' "Download Llama model" $((step++))
+    progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_S.gguf -P jan-inference/llm/models' "Download Llama model" $((step++))
 fi
 ###

--- a/web-client/app/_components/ChatBody/index.tsx
+++ b/web-client/app/_components/ChatBody/index.tsx
@ -128,6 +128,7 @@ export const ChatBody: React.FC<Props> = observer(({ onPromptSelected }) => {
 const renderItem = (
  index: number,
  {
+    id,
    messageType,
    senderAvatarUrl,
    senderName,
@ -172,9 +173,11 @@ const renderItem = (
      ) : (
        <StreamTextMessage
          key={index}
+          id={id}
          avatarUrl={senderAvatarUrl ?? "/icons/app_icon.svg"}
          senderName={senderName}
          createdAt={createdAt}
+          text={text}
        />
      );
    default:
--- a/web-client/app/_components/StreamTextMessage/index.tsx
+++ b/web-client/app/_components/StreamTextMessage/index.tsx
@ -1,9 +1,16 @@
-import React from "react";
+import React, { useEffect } from "react";
 import { displayDate } from "@/_utils/datetime";
 import { useStore } from "@/_models/RootStore";
-import { StreamingText, StreamingTextURL, useTextBuffer } from "nextjs-openai";
-import { MessageSenderType } from "@/_models/ChatMessage";
+import { StreamingText, useTextBuffer } from "nextjs-openai";
+import { MessageSenderType, MessageStatus } from "@/_models/ChatMessage";
 import { Role } from "@/_models/History";
+import { useMutation } from "@apollo/client";
+import { OpenAI } from "openai-streams";
+import {
+  UpdateMessageDocument,
+  UpdateMessageMutation,
+  UpdateMessageMutationVariables,
+} from "@/graphql";

 type Props = {
  id?: string;
@ -14,6 +21,7 @@ type Props = {
 };

 const StreamTextMessage: React.FC<Props> = ({
+  id,
  senderName,
  createdAt,
  avatarUrl = "",
@ -21,9 +29,22 @@ const StreamTextMessage: React.FC<Props> = ({
  const [data, setData] = React.useState<any | undefined>();
  const { historyStore } = useStore();
  const conversation = historyStore?.getActiveConversation();
+  const [updateMessage] = useMutation<UpdateMessageMutation>(
+    UpdateMessageDocument
+  );

  React.useEffect(() => {
-    const messages = conversation?.chatMessages.slice(-5).map((e) => ({
+    if (
+      !conversation ||
+      conversation.chatMessages.findIndex((e) => e.id === id) !==
+        conversation.chatMessages.length - 1
+    ) {
+      return;
+    }
+    const messages = conversation?.chatMessages
+      .slice(-10)
+      .filter((e) => e.id !== id)
+      .map((e) => ({
        role:
          e.messageSenderType === MessageSenderType.User
            ? Role.User
@ -32,32 +53,35 @@ const StreamTextMessage: React.FC<Props> = ({
      }));
    setData({
      messages,
-      stream: true,
-      model: "gpt-3.5-turbo",
-      max_tokens: 500,
    });
  }, [conversation]);

-  const { buffer, refresh, cancel } = useTextBuffer({
-    url: `${process.env.NEXT_PUBLIC_OPENAPI_ENDPOINT}`,
-    throttle: 100,
+  const { buffer, done } = useTextBuffer({
+    url: `api/openai`,
    data,
-
-    options: {
-      headers: {
-        "Content-Type": "application/json",
-      },
-    },
  });

-  const parsedBuffer = (buffer: String) => {
-    try {
-      const json = buffer.replace("data: ", "");
-      return JSON.parse(json).choices[0].text;
-    } catch (e) {
-      return "";
-    }
+  useEffect(() => {
+    if (done) {
+      // mutate result
+      const variables: UpdateMessageMutationVariables = {
+        id: id,
+        data: {
+          content: buffer.join(""),
+          status: MessageStatus.Ready,
+        },
      };
+      updateMessage({
+        variables,
+      });
+    }
+  }, [done]);
+
+  useEffect(() => {
+    if (buffer.length > 0 && conversation?.isWaitingForModelResponse) {
+      historyStore.finishActiveConversationWaiting();
+    }
+  }, [buffer]);

  return data ? (
    <div className="flex items-start gap-2">
@ -78,9 +102,7 @@ const StreamTextMessage: React.FC<Props> = ({
          </div>
        </div>
        <div className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]">
-          <StreamingText
-            buffer={buffer.map((b) => parsedBuffer(b))}
-          ></StreamingText>
+          <StreamingText buffer={buffer} fade={100} />
        </div>
      </div>
    </div>
--- a/web-client/app/api/openai/route.ts
+++ b/web-client/app/api/openai/route.ts
@ -0,0 +1,26 @@
+import { OpenAI } from "openai-streams";
+
+export async function POST(req: Request) {
+  const { messages } = await req.json();
+  if (!messages) {
+    return new Response(null, {
+      status: 400,
+      statusText: "Did not include `messages` parameter",
+    });
+  }
+  const completionsStream = await OpenAI(
+    "chat",
+    {
+      model: "gpt-3.5-turbo",
+      stream: true,
+      messages,
+      max_tokens: 500,
+    },
+    {
+      apiBase: process.env.OPENAPI_ENDPOINT,
+      apiKey: process.env.OPENAPI_KEY,
+    }
+  );
+
+  return new Response(completionsStream);
+}