re-#143: use OpenAI decoder and mutate final result from client (#164)

* chore: use OpenAI parser * chore: access host's services * chore: take out llm service - GGUF model for the latest llama.cpp support
2023-09-12 16:29:26 +07:00 · 2023-09-12 16:29:26 +07:00 · 6aae985a55
commit 6aae985a55
parent 83d2e34bd7
6 changed files with 87 additions and 49 deletions
--- a/conf/sample.env_web-client
+++ b/conf/sample.env_web-client
@ -5,7 +5,8 @@ NEXT_PUBLIC_DOWNLOAD_APP_IOS=#
 NEXT_PUBLIC_DOWNLOAD_APP_ANDROID=#
 NEXT_PUBLIC_GRAPHQL_ENGINE_URL=http://localhost:8080/v1/graphql 
 NEXT_PUBLIC_GRAPHQL_ENGINE_WEB_SOCKET_URL=ws://localhost:8080/v1/graphql
-NEXT_PUBLIC_OPENAPI_ENDPOINT=http://localhost:8000/v1/completions
+OPENAPI_ENDPOINT=http://host.docker.internal:8000/v1
 OPENAPI_KEY=openapikey
 KEYCLOAK_CLIENT_ID=hasura
 KEYCLOAK_CLIENT_SECRET=oMtCPAV7diKpE564SBspgKj4HqlKM4Hy
 AUTH_ISSUER=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -112,20 +112,6 @@ services:
      jan_community:
        ipv4_address: 172.20.0.15
  llm:
    image: ghcr.io/abetlen/llama-cpp-python@sha256:b6d21ff8c4d9baad65e1fa741a0f8c898d68735fff3f3cd777e3f0c6a1839dd4
    volumes:
      - ./jan-inference/llm/models:/models
    ports:
      - 8000:8000
    environment:
      MODEL: /models/${LLM_MODEL_FILE}
      PYTHONUNBUFFERED: 1
    restart: on-failure
    networks:
      jan_community:
        ipv4_address: 172.20.0.18
 networks:
  jan_community:
    driver: bridge
--- a/run.sh
+++ b/run.sh
@ -124,10 +124,10 @@ progress 'cp -f sample.env .env' "Prepare .env file" $((step++))
 ###
 ### Download Model
-if [ -f "jan-inference/llm/models/llama-2-7b-chat.ggmlv3.q4_1.bin" ]; then
+if [ -f "jan-inference/llm/models/llama-2-7b.Q4_K_S.gguf" ]; then
    progress '' "Llama model - Installed" $((step++))
 else
-    progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -P jan-inference/llm/models' "Download Llama model" $((step++))
+    progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_S.gguf -P jan-inference/llm/models' "Download Llama model" $((step++))
 fi
 ###
--- a/web-client/app/_components/ChatBody/index.tsx
+++ b/web-client/app/_components/ChatBody/index.tsx
@ -128,6 +128,7 @@ export const ChatBody: React.FC<Props> = observer(({ onPromptSelected }) => {
 const renderItem = (
  index: number,
  {
    id,
    messageType,
    senderAvatarUrl,
    senderName,
@ -172,9 +173,11 @@ const renderItem = (
      ) : (
        <StreamTextMessage
          key={index}
          id={id}
          avatarUrl={senderAvatarUrl ?? "/icons/app_icon.svg"}
          senderName={senderName}
          createdAt={createdAt}
          text={text}
        />
      );
    default:
--- a/web-client/app/_components/StreamTextMessage/index.tsx
+++ b/web-client/app/_components/StreamTextMessage/index.tsx
@ -1,9 +1,16 @@
-import React from "react";
+import React, { useEffect } from "react";
 import { displayDate } from "@/_utils/datetime";
 import { useStore } from "@/_models/RootStore";
-import { StreamingText, StreamingTextURL, useTextBuffer } from "nextjs-openai";
+import { StreamingText, useTextBuffer } from "nextjs-openai";
-import { MessageSenderType } from "@/_models/ChatMessage";
+import { MessageSenderType, MessageStatus } from "@/_models/ChatMessage";
 import { Role } from "@/_models/History";
 import { useMutation } from "@apollo/client";
 import { OpenAI } from "openai-streams";
 import {
  UpdateMessageDocument,
  UpdateMessageMutation,
  UpdateMessageMutationVariables,
 } from "@/graphql";
 type Props = {
  id?: string;
@ -14,6 +21,7 @@ type Props = {
 };
 const StreamTextMessage: React.FC<Props> = ({
  id,
  senderName,
  createdAt,
  avatarUrl = "",
@ -21,43 +29,59 @@ const StreamTextMessage: React.FC<Props> = ({
  const [data, setData] = React.useState<any | undefined>();
  const { historyStore } = useStore();
  const conversation = historyStore?.getActiveConversation();
  const [updateMessage] = useMutation<UpdateMessageMutation>(
    UpdateMessageDocument
  );
  React.useEffect(() => {
-    const messages = conversation?.chatMessages.slice(-5).map((e) => ({
+    if (
-      role:
+      !conversation ||
-        e.messageSenderType === MessageSenderType.User
+      conversation.chatMessages.findIndex((e) => e.id === id) !==
-          ? Role.User
+        conversation.chatMessages.length - 1
-          : Role.Assistant,
+    ) {
-      content: e.text,
+      return;
-    }));
+    }
    const messages = conversation?.chatMessages
      .slice(-10)
      .filter((e) => e.id !== id)
      .map((e) => ({
        role:
          e.messageSenderType === MessageSenderType.User
            ? Role.User
            : Role.Assistant,
        content: e.text,
      }));
    setData({
      messages,
      stream: true,
      model: "gpt-3.5-turbo",
      max_tokens: 500,
    });
  }, [conversation]);
-  const { buffer, refresh, cancel } = useTextBuffer({
+  const { buffer, done } = useTextBuffer({
-    url: `${process.env.NEXT_PUBLIC_OPENAPI_ENDPOINT}`,
+    url: `api/openai`,
    throttle: 100,
    data,
    options: {
      headers: {
        "Content-Type": "application/json",
      },
    },
  });
-  const parsedBuffer = (buffer: String) => {
+  useEffect(() => {
-    try {
+    if (done) {
-      const json = buffer.replace("data: ", "");
+      // mutate result
-      return JSON.parse(json).choices[0].text;
+      const variables: UpdateMessageMutationVariables = {
-    } catch (e) {
+        id: id,
-      return "";
+        data: {
          content: buffer.join(""),
          status: MessageStatus.Ready,
        },
      };
      updateMessage({
        variables,
      });
    }
-  };
+  }, [done]);
  useEffect(() => {
    if (buffer.length > 0 && conversation?.isWaitingForModelResponse) {
      historyStore.finishActiveConversationWaiting();
    }
  }, [buffer]);
  return data ? (
    <div className="flex items-start gap-2">
@ -78,9 +102,7 @@ const StreamTextMessage: React.FC<Props> = ({
          </div>
        </div>
        <div className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]">
-          <StreamingText
+          <StreamingText buffer={buffer} fade={100} />
            buffer={buffer.map((b) => parsedBuffer(b))}
          ></StreamingText>
        </div>
      </div>
    </div>
--- a/web-client/app/api/openai/route.ts
+++ b/web-client/app/api/openai/route.ts
@ -0,0 +1,26 @@
 import { OpenAI } from "openai-streams";
 export async function POST(req: Request) {
  const { messages } = await req.json();
  if (!messages) {
    return new Response(null, {
      status: 400,
      statusText: "Did not include `messages` parameter",
    });
  }
  const completionsStream = await OpenAI(
    "chat",
    {
      model: "gpt-3.5-turbo",
      stream: true,
      messages,
      max_tokens: 500,
    },
    {
      apiBase: process.env.OPENAPI_ENDPOINT,
      apiKey: process.env.OPENAPI_KEY,
    }
  );
  return new Response(completionsStream);
 }