From 6aae985a5564de51fc9078dea80a2042dcefe7f9 Mon Sep 17 00:00:00 2001 From: Louis <133622055+louis-jan@users.noreply.github.com> Date: Tue, 12 Sep 2023 16:29:26 +0700 Subject: [PATCH] re-#143: use OpenAI decoder and mutate final result from client (#164) * chore: use OpenAI parser * chore: access host's services * chore: take out llm service - GGUF model for the latest llama.cpp support --- conf/sample.env_web-client | 3 +- docker-compose.yml | 14 --- run.sh | 4 +- web-client/app/_components/ChatBody/index.tsx | 3 + .../_components/StreamTextMessage/index.tsx | 86 ++++++++++++------- web-client/app/api/openai/route.ts | 26 ++++++ 6 files changed, 87 insertions(+), 49 deletions(-) create mode 100644 web-client/app/api/openai/route.ts diff --git a/conf/sample.env_web-client b/conf/sample.env_web-client index 897feeb36..35fc6972c 100644 --- a/conf/sample.env_web-client +++ b/conf/sample.env_web-client @@ -5,7 +5,8 @@ NEXT_PUBLIC_DOWNLOAD_APP_IOS=# NEXT_PUBLIC_DOWNLOAD_APP_ANDROID=# NEXT_PUBLIC_GRAPHQL_ENGINE_URL=http://localhost:8080/v1/graphql NEXT_PUBLIC_GRAPHQL_ENGINE_WEB_SOCKET_URL=ws://localhost:8080/v1/graphql -NEXT_PUBLIC_OPENAPI_ENDPOINT=http://localhost:8000/v1/completions +OPENAPI_ENDPOINT=http://host.docker.internal:8000/v1 +OPENAPI_KEY=openapikey KEYCLOAK_CLIENT_ID=hasura KEYCLOAK_CLIENT_SECRET=oMtCPAV7diKpE564SBspgKj4HqlKM4Hy AUTH_ISSUER=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID diff --git a/docker-compose.yml b/docker-compose.yml index 092e31e91..2a0c2bad7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -112,20 +112,6 @@ services: jan_community: ipv4_address: 172.20.0.15 - llm: - image: ghcr.io/abetlen/llama-cpp-python@sha256:b6d21ff8c4d9baad65e1fa741a0f8c898d68735fff3f3cd777e3f0c6a1839dd4 - volumes: - - ./jan-inference/llm/models:/models - ports: - - 8000:8000 - environment: - MODEL: /models/${LLM_MODEL_FILE} - PYTHONUNBUFFERED: 1 - restart: on-failure - networks: - jan_community: - ipv4_address: 172.20.0.18 - networks: jan_community: driver: bridge diff --git a/run.sh b/run.sh index 043b0c1cb..cc9fe8c44 100755 --- a/run.sh +++ b/run.sh @@ -124,10 +124,10 @@ progress 'cp -f sample.env .env' "Prepare .env file" $((step++)) ### ### Download Model -if [ -f "jan-inference/llm/models/llama-2-7b-chat.ggmlv3.q4_1.bin" ]; then +if [ -f "jan-inference/llm/models/llama-2-7b.Q4_K_S.gguf" ]; then progress '' "Llama model - Installed" $((step++)) else - progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -P jan-inference/llm/models' "Download Llama model" $((step++)) + progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_S.gguf -P jan-inference/llm/models' "Download Llama model" $((step++)) fi ### diff --git a/web-client/app/_components/ChatBody/index.tsx b/web-client/app/_components/ChatBody/index.tsx index 95768cd19..5a7928684 100644 --- a/web-client/app/_components/ChatBody/index.tsx +++ b/web-client/app/_components/ChatBody/index.tsx @@ -128,6 +128,7 @@ export const ChatBody: React.FC = observer(({ onPromptSelected }) => { const renderItem = ( index: number, { + id, messageType, senderAvatarUrl, senderName, @@ -172,9 +173,11 @@ const renderItem = ( ) : ( ); default: diff --git a/web-client/app/_components/StreamTextMessage/index.tsx b/web-client/app/_components/StreamTextMessage/index.tsx index 4da7d9857..f39b06052 100644 --- a/web-client/app/_components/StreamTextMessage/index.tsx +++ b/web-client/app/_components/StreamTextMessage/index.tsx @@ -1,9 +1,16 @@ -import React from "react"; +import React, { useEffect } from "react"; import { displayDate } from "@/_utils/datetime"; import { useStore } from "@/_models/RootStore"; -import { StreamingText, StreamingTextURL, useTextBuffer } from "nextjs-openai"; -import { MessageSenderType } from "@/_models/ChatMessage"; +import { StreamingText, useTextBuffer } from "nextjs-openai"; +import { MessageSenderType, MessageStatus } from "@/_models/ChatMessage"; import { Role } from "@/_models/History"; +import { useMutation } from "@apollo/client"; +import { OpenAI } from "openai-streams"; +import { + UpdateMessageDocument, + UpdateMessageMutation, + UpdateMessageMutationVariables, +} from "@/graphql"; type Props = { id?: string; @@ -14,6 +21,7 @@ type Props = { }; const StreamTextMessage: React.FC = ({ + id, senderName, createdAt, avatarUrl = "", @@ -21,43 +29,59 @@ const StreamTextMessage: React.FC = ({ const [data, setData] = React.useState(); const { historyStore } = useStore(); const conversation = historyStore?.getActiveConversation(); + const [updateMessage] = useMutation( + UpdateMessageDocument + ); React.useEffect(() => { - const messages = conversation?.chatMessages.slice(-5).map((e) => ({ - role: - e.messageSenderType === MessageSenderType.User - ? Role.User - : Role.Assistant, - content: e.text, - })); + if ( + !conversation || + conversation.chatMessages.findIndex((e) => e.id === id) !== + conversation.chatMessages.length - 1 + ) { + return; + } + const messages = conversation?.chatMessages + .slice(-10) + .filter((e) => e.id !== id) + .map((e) => ({ + role: + e.messageSenderType === MessageSenderType.User + ? Role.User + : Role.Assistant, + content: e.text, + })); setData({ messages, - stream: true, - model: "gpt-3.5-turbo", - max_tokens: 500, }); }, [conversation]); - const { buffer, refresh, cancel } = useTextBuffer({ - url: `${process.env.NEXT_PUBLIC_OPENAPI_ENDPOINT}`, - throttle: 100, + const { buffer, done } = useTextBuffer({ + url: `api/openai`, data, - - options: { - headers: { - "Content-Type": "application/json", - }, - }, }); - const parsedBuffer = (buffer: String) => { - try { - const json = buffer.replace("data: ", ""); - return JSON.parse(json).choices[0].text; - } catch (e) { - return ""; + useEffect(() => { + if (done) { + // mutate result + const variables: UpdateMessageMutationVariables = { + id: id, + data: { + content: buffer.join(""), + status: MessageStatus.Ready, + }, + }; + updateMessage({ + variables, + }); } - }; + }, [done]); + + useEffect(() => { + if (buffer.length > 0 && conversation?.isWaitingForModelResponse) { + historyStore.finishActiveConversationWaiting(); + } + }, [buffer]); return data ? (
@@ -78,9 +102,7 @@ const StreamTextMessage: React.FC = ({
- parsedBuffer(b))} - > +
diff --git a/web-client/app/api/openai/route.ts b/web-client/app/api/openai/route.ts new file mode 100644 index 000000000..4ed892ccb --- /dev/null +++ b/web-client/app/api/openai/route.ts @@ -0,0 +1,26 @@ +import { OpenAI } from "openai-streams"; + +export async function POST(req: Request) { + const { messages } = await req.json(); + if (!messages) { + return new Response(null, { + status: 400, + statusText: "Did not include `messages` parameter", + }); + } + const completionsStream = await OpenAI( + "chat", + { + model: "gpt-3.5-turbo", + stream: true, + messages, + max_tokens: 500, + }, + { + apiBase: process.env.OPENAPI_ENDPOINT, + apiKey: process.env.OPENAPI_KEY, + } + ); + + return new Response(completionsStream); +}