Merge pull request #159 from janhq/fix/#143-direct-communication-between-client-llm

fix: #143 - Direct communication between client and llm inference service
2023-09-12 15:09:59 +08:00 · 2023-09-12 15:09:59 +08:00 · 83d2e34bd7
commit 83d2e34bd7
parent 0e94988b33 6b462e383e
10 changed files with 57 additions and 318 deletions
--- a/app-backend/hasura/metadata/databases/jandb/tables/public_messages.yaml
+++ b/app-backend/hasura/metadata/databases/jandb/tables/public_messages.yaml
@ -84,14 +84,3 @@ delete_permissions:
          user_id:
            _eq: X-Hasura-User-Id
    comment: ""
 event_triggers:
  - name: new_llm_message
    definition:
      enable_manual: false
      insert:
        columns: '*'
    retry_conf:
      interval_sec: 10
      num_retries: 0
      timeout_sec: 60
    webhook: '{{HASURA_EVENTS_HOOK_URL}}'
--- a/app-backend/worker/Dockerfile
+++ b/app-backend/worker/Dockerfile
@ -1,16 +0,0 @@
 # Dockerfile
 # alpine does not work with wrangler
 FROM node 
 RUN mkdir -p /worker
 WORKDIR /worker
 RUN npm install -g wrangler
 COPY . /worker
 EXPOSE 8787
 CMD ["wrangler", "dev"]
--- a/app-backend/worker/worker.ts
+++ b/app-backend/worker/worker.ts
@ -1,196 +0,0 @@
 export interface Env {
  HASURA_ADMIN_API_KEY: string;
  LLM_INFERENCE_ENDPOINT: string;
  INFERENCE_API_KEY: string;
  HASURA_GRAPHQL_ENGINE_ENDPOINT: string;
 }
 export default {
  async fetch(request: Request, env: Env) {
    return handleRequest(env, request);
  },
 };
 async function handleRequest(env: Env, request: Request) {
  const apiurl = env.LLM_INFERENCE_ENDPOINT;
  const requestBody = await request.json();
  let lastCallTime = 0;
  let timeoutId: any;
  let done = true;
  function throttle(fn: () => void, delay: number) {
    return async function () {
      const now = new Date().getTime();
      const timeSinceLastCall = now - lastCallTime;
      if (timeSinceLastCall >= delay && done) {
        lastCallTime = now;
        done = false;
        await fn();
        done = true;
      } else {
        clearTimeout(timeoutId);
        timeoutId = setTimeout(async () => {
          lastCallTime = now;
          done = false;
          await fn();
          done = true;
        }, delay - timeSinceLastCall);
      }
    };
  }
  const messageBody = {
    id: requestBody.event.data.new.id,
    content: requestBody.event.data.new.content,
    messages: requestBody.event.data.new.prompt_cache,
    status: requestBody.event.data.new.status,
  };
  if (messageBody.status !== "pending") {
    return new Response(JSON.stringify({ status: "success" }), {
      status: 200,
      statusText: "success",
    });
  }
  const llmRequestBody = {
    messages: messageBody.messages,
    stream: true,
    model: "gpt-3.5-turbo",
    max_tokens: 500,
  };
  const init = {
    body: JSON.stringify(llmRequestBody),
    headers: {
      "Content-Type": "application/json",
      Accept: "text/event-stream",
      Authorization: "Access-Control-Allow-Origin: *",
      "api-key": env.INFERENCE_API_KEY,
    },
    method: "POST",
  };
  return fetch(apiurl, init)
    .then((res) => res.body?.getReader())
    .then(async (reader) => {
      if (!reader) {
        console.error("Error: fail to read data from response");
        return;
      }
      let answer = "";
      let cachedChunk = "";
      while (true) {
        const { done, value } = await reader.read();
        if (done) {
          break;
        }
        const textDecoder = new TextDecoder("utf-8");
        const chunk = textDecoder.decode(value);
        cachedChunk += chunk;
        const matched = cachedChunk.match(/data: {(.*)}/g);
        if (!matched) {
          continue;
        }
        let deltaText = "";
        for (const line of cachedChunk.split("\n")) {
          const trimmedLine = line.trim();
          if (!trimmedLine || trimmedLine === "data: [DONE]") {
            continue;
          }
          const json = trimmedLine.replace("data: ", "");
          try {
            const obj = JSON.parse(json);
            const content = obj.choices[0].delta.content;
            if (content) deltaText = deltaText.concat(content);
          } catch (e) {
          }
        }
        cachedChunk = "";
        answer = answer + deltaText;
        const variables = {
          id: messageBody.id,
          data: {
            content: answer.trim(),
          },
        };
        throttle(async () => {
          await fetch(env.HASURA_GRAPHQL_ENGINE_ENDPOINT + "/v1/graphql", {
            method: "POST",
            body: JSON.stringify({ query: updateMessageQuery, variables }),
            headers: {
              "Content-Type": "application/json",
              "x-hasura-admin-secret": env.HASURA_ADMIN_API_KEY,
            },
          })
            .catch((error) => {
              console.error(error);
            })
            .finally(() => console.log("++-- request sent"));
        }, 300)();
      }
      const variables = {
        id: messageBody.id,
        data: {
          status: "ready",
          prompt_cache: null,
        },
      };
      await fetch(env.HASURA_GRAPHQL_ENGINE_ENDPOINT + "/v1/graphql", {
        method: "POST",
        body: JSON.stringify({ query: updateMessageQuery, variables }),
        headers: {
          "Content-Type": "application/json",
          "x-hasura-admin-secret": env.HASURA_ADMIN_API_KEY,
        },
      }).catch((error) => {
        console.error(error);
      });
      const convUpdateVars = {
        id: requestBody.event.data.new.conversation_id,
        content: answer
      }
      await fetch(env.HASURA_GRAPHQL_ENGINE_ENDPOINT + "/v1/graphql", {
        method: "POST",
        body: JSON.stringify({ query: updateConversationquery, variables: convUpdateVars }),
        headers: {
          "Content-Type": "application/json",
          "x-hasura-admin-secret": env.HASURA_ADMIN_API_KEY,
        },
      }).catch((error) => {
        console.error(error);
      });
      return new Response(JSON.stringify({ status: "success" }), {
        status: 200,
        statusText: "success",
      });
    });
 }
 const updateMessageQuery = `
 mutation chatCompletions($id: uuid = "", $data: messages_set_input) {
  update_messages_by_pk(pk_columns: {id: $id}, _set: $data) {
    id
    content
  }
 }
 `;
 const updateConversationquery = `
 mutation updateConversation($id: uuid = "", $content: String = "") {
  update_conversations_by_pk(pk_columns: {id: $id}, _set: {last_text_message: $content}) {
    id
  }
 }
 `
--- a/app-backend/worker/wrangler.toml
+++ b/app-backend/worker/wrangler.toml
@ -1,11 +0,0 @@
 name = "cloudlfare_worker"
 main = "worker.ts"
 compatibility_date = "2023-06-08"
 workers_dev = true
 [vars]
 HASURA_GRAPHQL_ENGINE_ENDPOINT = "http://graphql-engine:8080"
 HASURA_ADMIN_API_KEY = "myadminsecretkey"
 LLM_INFERENCE_ENDPOINT="http://llm:8000/v1/chat/completions"
 INFERENCE_API_KEY=""
 PROJECT_ID = ""
--- a/conf/sample.env_web-client
+++ b/conf/sample.env_web-client
@ -5,6 +5,7 @@ NEXT_PUBLIC_DOWNLOAD_APP_IOS=#
 NEXT_PUBLIC_DOWNLOAD_APP_ANDROID=#
 NEXT_PUBLIC_GRAPHQL_ENGINE_URL=http://localhost:8080/v1/graphql 
 NEXT_PUBLIC_GRAPHQL_ENGINE_WEB_SOCKET_URL=ws://localhost:8080/v1/graphql
 NEXT_PUBLIC_OPENAPI_ENDPOINT=http://localhost:8000/v1/completions
 KEYCLOAK_CLIENT_ID=hasura
 KEYCLOAK_CLIENT_SECRET=oMtCPAV7diKpE564SBspgKj4HqlKM4Hy
 AUTH_ISSUER=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID
@ -12,4 +13,3 @@ NEXTAUTH_URL=http://localhost:3000
 NEXTAUTH_SECRET=my-secret
 END_SESSION_URL=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID/protocol/openid-connect/logout
 REFRESH_TOKEN_URL=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID/protocol/openid-connect/token
 HASURA_ADMIN_TOKEN=myadminsecretkey
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -68,21 +68,6 @@ services:
      jan_community:
        ipv4_address: 172.20.0.12
  worker:
    build:
      context: ./app-backend/worker
      dockerfile: ./Dockerfile
    restart: always
    environment:
      - "NODE_ENV=development"
    volumes:
      - ./app-backend/worker:/worker
    ports:
      - 8787:8787
    networks:
      jan_community:
        ipv4_address: 172.20.0.13
  data-connector-agent:
    image: hasura/graphql-data-connector:v2.31.0
    restart: always
--- a/web-client/README.md
+++ b/web-client/README.md
@ -33,8 +33,13 @@ For using our complete solution, check [this](https://github.com/janhq/jan)
   ```
   yarn dev
   ```
 4. **Regenerate Graphql:**
-4. **Access Jan Web:**
+   ```
   HASURA_ADMIN_TOKEN="[hasura_admin_secret_key]" yarn generate
   ```
 5. **Access Jan Web:**
   Open your web browser and navigate to `http://localhost:3000` to access the Jan Web application.
--- a/web-client/app/_components/ChatBody/index.tsx
+++ b/web-client/app/_components/ChatBody/index.tsx
@ -128,7 +128,6 @@ export const ChatBody: React.FC<Props> = observer(({ onPromptSelected }) => {
 const renderItem = (
  index: number,
  {
    id,
    messageType,
    senderAvatarUrl,
    senderName,
@ -173,11 +172,9 @@ const renderItem = (
      ) : (
        <StreamTextMessage
          key={index}
          id={id}
          avatarUrl={senderAvatarUrl ?? "/icons/app_icon.svg"}
          senderName={senderName}
          createdAt={createdAt}
          text={text}
        />
      );
    default:
--- a/web-client/app/_components/StreamTextMessage/index.tsx
+++ b/web-client/app/_components/StreamTextMessage/index.tsx
@ -1,13 +1,9 @@
-import React, { useEffect } from "react";
+import React from "react";
 import { displayDate } from "@/_utils/datetime";
 import { TextCode } from "../TextCode";
 import { getMessageCode } from "@/_utils/message";
 import { useSubscription } from "@apollo/client";
 import {
  SubscribeMessageDocument,
  SubscribeMessageSubscription,
 } from "@/graphql";
 import { useStore } from "@/_models/RootStore";
 import { StreamingText, StreamingTextURL, useTextBuffer } from "nextjs-openai";
 import { MessageSenderType } from "@/_models/ChatMessage";
 import { Role } from "@/_models/History";
 type Props = {
  id?: string;
@ -18,54 +14,52 @@ type Props = {
 };
 const StreamTextMessage: React.FC<Props> = ({
  id,
  senderName,
  createdAt,
  avatarUrl = "",
  text = "",
 }) => {
-  const [textMessage, setTextMessage] = React.useState(text);
+  const [data, setData] = React.useState<any | undefined>();
  const [completedTyping, setCompletedTyping] = React.useState(false);
  const tokenIndex = React.useRef(0);
  const { historyStore } = useStore();
-  const { data } = useSubscription<SubscribeMessageSubscription>(
+  const conversation = historyStore?.getActiveConversation();
-    SubscribeMessageDocument,
+
-    {
+  React.useEffect(() => {
-      variables: {
+    const messages = conversation?.chatMessages.slice(-5).map((e) => ({
-        id,
+      role:
        e.messageSenderType === MessageSenderType.User
          ? Role.User
          : Role.Assistant,
      content: e.text,
    }));
    setData({
      messages,
      stream: true,
      model: "gpt-3.5-turbo",
      max_tokens: 500,
    });
  }, [conversation]);
  const { buffer, refresh, cancel } = useTextBuffer({
    url: `${process.env.NEXT_PUBLIC_OPENAPI_ENDPOINT}`,
    throttle: 100,
    data,
    options: {
      headers: {
        "Content-Type": "application/json",
      },
    },
  });
  const parsedBuffer = (buffer: String) => {
    try {
      const json = buffer.replace("data: ", "");
      return JSON.parse(json).choices[0].text;
    } catch (e) {
      return "";
    }
-  );
+  };
-  useEffect(() => {
+  return data ? (
    if (
      data?.messages_by_pk?.content &&
      data.messages_by_pk.content.length > text.length
    ) {
      historyStore.finishActiveConversationWaiting();
    }
  }, [data, text]); // eslint-disable-line react-hooks/exhaustive-deps
  useEffect(() => {
    setCompletedTyping(false);
    const stringResponse = data?.messages_by_pk?.content ?? text;
    const intervalId = setInterval(() => {
      setTextMessage(stringResponse.slice(0, tokenIndex.current));
      tokenIndex.current++;
      if (tokenIndex.current > stringResponse.length) {
        clearInterval(intervalId);
        setCompletedTyping(true);
      }
    }, 20);
    return () => clearInterval(intervalId);
  }, [data?.messages_by_pk?.content, text]);
  return textMessage.length > 0 ? (
    <div className="flex items-start gap-2">
      <img
        className="rounded-full"
@ -83,20 +77,11 @@ const StreamTextMessage: React.FC<Props> = ({
            {displayDate(createdAt)}
          </div>
        </div>
-        {textMessage.includes("```") ? (
+        <div className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]">
-          getMessageCode(textMessage).map((item, i) => (
+          <StreamingText
-            <div className="flex gap-1 flex-col" key={i}>
+            buffer={buffer.map((b) => parsedBuffer(b))}
-              <p className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]">
+          ></StreamingText>
-                {item.text}
+        </div>
              </p>
              {item.code.trim().length > 0 && <TextCode text={item.code} />}
            </div>
          ))
        ) : (
          <p className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]">
            {textMessage}
          </p>
        )}
      </div>
    </div>
  ) : (
--- a/web-client/package.json
+++ b/web-client/package.json
@ -8,7 +8,6 @@
    "start": "next start",
    "lint": "next lint",
    "compile": "tsc --noEmit -p . --pretty",
    "configure": "op read op://Shared/WebDevelopmentEnv/.env > .env && op read op://Shared/WebDevelopmentFirebaseConfig/firebase_configs.json > app/_services/firebase/firebase_configs.json",
    "generate": "graphql-codegen --config codegen.ts",
    "watch": "graphql-codegen -w"
  },
@ -36,6 +35,8 @@
    "next": "13.4.10",
    "next-auth": "^4.23.1",
    "next-themes": "^0.2.1",
    "nextjs-openai": "^7.2.0",
    "openai-streams": "^6.2.0",
    "postcss": "8.4.26",
    "react": "18.2.0",
    "react-dom": "18.2.0",