re-#143: use OpenAI decoder and mutate final result from client (#164)

* chore: use OpenAI parser

* chore: access host's services

* chore: take out llm service - GGUF model for the latest llama.cpp support
This commit is contained in:
Louis 2023-09-12 16:29:26 +07:00 committed by GitHub
parent 83d2e34bd7
commit 6aae985a55
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 87 additions and 49 deletions

View File

@ -5,7 +5,8 @@ NEXT_PUBLIC_DOWNLOAD_APP_IOS=#
NEXT_PUBLIC_DOWNLOAD_APP_ANDROID=# NEXT_PUBLIC_DOWNLOAD_APP_ANDROID=#
NEXT_PUBLIC_GRAPHQL_ENGINE_URL=http://localhost:8080/v1/graphql NEXT_PUBLIC_GRAPHQL_ENGINE_URL=http://localhost:8080/v1/graphql
NEXT_PUBLIC_GRAPHQL_ENGINE_WEB_SOCKET_URL=ws://localhost:8080/v1/graphql NEXT_PUBLIC_GRAPHQL_ENGINE_WEB_SOCKET_URL=ws://localhost:8080/v1/graphql
NEXT_PUBLIC_OPENAPI_ENDPOINT=http://localhost:8000/v1/completions OPENAPI_ENDPOINT=http://host.docker.internal:8000/v1
OPENAPI_KEY=openapikey
KEYCLOAK_CLIENT_ID=hasura KEYCLOAK_CLIENT_ID=hasura
KEYCLOAK_CLIENT_SECRET=oMtCPAV7diKpE564SBspgKj4HqlKM4Hy KEYCLOAK_CLIENT_SECRET=oMtCPAV7diKpE564SBspgKj4HqlKM4Hy
AUTH_ISSUER=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID AUTH_ISSUER=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID

View File

@ -112,20 +112,6 @@ services:
jan_community: jan_community:
ipv4_address: 172.20.0.15 ipv4_address: 172.20.0.15
llm:
image: ghcr.io/abetlen/llama-cpp-python@sha256:b6d21ff8c4d9baad65e1fa741a0f8c898d68735fff3f3cd777e3f0c6a1839dd4
volumes:
- ./jan-inference/llm/models:/models
ports:
- 8000:8000
environment:
MODEL: /models/${LLM_MODEL_FILE}
PYTHONUNBUFFERED: 1
restart: on-failure
networks:
jan_community:
ipv4_address: 172.20.0.18
networks: networks:
jan_community: jan_community:
driver: bridge driver: bridge

4
run.sh
View File

@ -124,10 +124,10 @@ progress 'cp -f sample.env .env' "Prepare .env file" $((step++))
### ###
### Download Model ### Download Model
if [ -f "jan-inference/llm/models/llama-2-7b-chat.ggmlv3.q4_1.bin" ]; then if [ -f "jan-inference/llm/models/llama-2-7b.Q4_K_S.gguf" ]; then
progress '' "Llama model - Installed" $((step++)) progress '' "Llama model - Installed" $((step++))
else else
progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -P jan-inference/llm/models' "Download Llama model" $((step++)) progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_S.gguf -P jan-inference/llm/models' "Download Llama model" $((step++))
fi fi
### ###

View File

@ -128,6 +128,7 @@ export const ChatBody: React.FC<Props> = observer(({ onPromptSelected }) => {
const renderItem = ( const renderItem = (
index: number, index: number,
{ {
id,
messageType, messageType,
senderAvatarUrl, senderAvatarUrl,
senderName, senderName,
@ -172,9 +173,11 @@ const renderItem = (
) : ( ) : (
<StreamTextMessage <StreamTextMessage
key={index} key={index}
id={id}
avatarUrl={senderAvatarUrl ?? "/icons/app_icon.svg"} avatarUrl={senderAvatarUrl ?? "/icons/app_icon.svg"}
senderName={senderName} senderName={senderName}
createdAt={createdAt} createdAt={createdAt}
text={text}
/> />
); );
default: default:

View File

@ -1,9 +1,16 @@
import React from "react"; import React, { useEffect } from "react";
import { displayDate } from "@/_utils/datetime"; import { displayDate } from "@/_utils/datetime";
import { useStore } from "@/_models/RootStore"; import { useStore } from "@/_models/RootStore";
import { StreamingText, StreamingTextURL, useTextBuffer } from "nextjs-openai"; import { StreamingText, useTextBuffer } from "nextjs-openai";
import { MessageSenderType } from "@/_models/ChatMessage"; import { MessageSenderType, MessageStatus } from "@/_models/ChatMessage";
import { Role } from "@/_models/History"; import { Role } from "@/_models/History";
import { useMutation } from "@apollo/client";
import { OpenAI } from "openai-streams";
import {
UpdateMessageDocument,
UpdateMessageMutation,
UpdateMessageMutationVariables,
} from "@/graphql";
type Props = { type Props = {
id?: string; id?: string;
@ -14,6 +21,7 @@ type Props = {
}; };
const StreamTextMessage: React.FC<Props> = ({ const StreamTextMessage: React.FC<Props> = ({
id,
senderName, senderName,
createdAt, createdAt,
avatarUrl = "", avatarUrl = "",
@ -21,43 +29,59 @@ const StreamTextMessage: React.FC<Props> = ({
const [data, setData] = React.useState<any | undefined>(); const [data, setData] = React.useState<any | undefined>();
const { historyStore } = useStore(); const { historyStore } = useStore();
const conversation = historyStore?.getActiveConversation(); const conversation = historyStore?.getActiveConversation();
const [updateMessage] = useMutation<UpdateMessageMutation>(
UpdateMessageDocument
);
React.useEffect(() => { React.useEffect(() => {
const messages = conversation?.chatMessages.slice(-5).map((e) => ({ if (
role: !conversation ||
e.messageSenderType === MessageSenderType.User conversation.chatMessages.findIndex((e) => e.id === id) !==
? Role.User conversation.chatMessages.length - 1
: Role.Assistant, ) {
content: e.text, return;
})); }
const messages = conversation?.chatMessages
.slice(-10)
.filter((e) => e.id !== id)
.map((e) => ({
role:
e.messageSenderType === MessageSenderType.User
? Role.User
: Role.Assistant,
content: e.text,
}));
setData({ setData({
messages, messages,
stream: true,
model: "gpt-3.5-turbo",
max_tokens: 500,
}); });
}, [conversation]); }, [conversation]);
const { buffer, refresh, cancel } = useTextBuffer({ const { buffer, done } = useTextBuffer({
url: `${process.env.NEXT_PUBLIC_OPENAPI_ENDPOINT}`, url: `api/openai`,
throttle: 100,
data, data,
options: {
headers: {
"Content-Type": "application/json",
},
},
}); });
const parsedBuffer = (buffer: String) => { useEffect(() => {
try { if (done) {
const json = buffer.replace("data: ", ""); // mutate result
return JSON.parse(json).choices[0].text; const variables: UpdateMessageMutationVariables = {
} catch (e) { id: id,
return ""; data: {
content: buffer.join(""),
status: MessageStatus.Ready,
},
};
updateMessage({
variables,
});
} }
}; }, [done]);
useEffect(() => {
if (buffer.length > 0 && conversation?.isWaitingForModelResponse) {
historyStore.finishActiveConversationWaiting();
}
}, [buffer]);
return data ? ( return data ? (
<div className="flex items-start gap-2"> <div className="flex items-start gap-2">
@ -78,9 +102,7 @@ const StreamTextMessage: React.FC<Props> = ({
</div> </div>
</div> </div>
<div className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]"> <div className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]">
<StreamingText <StreamingText buffer={buffer} fade={100} />
buffer={buffer.map((b) => parsedBuffer(b))}
></StreamingText>
</div> </div>
</div> </div>
</div> </div>

View File

@ -0,0 +1,26 @@
import { OpenAI } from "openai-streams";
export async function POST(req: Request) {
const { messages } = await req.json();
if (!messages) {
return new Response(null, {
status: 400,
statusText: "Did not include `messages` parameter",
});
}
const completionsStream = await OpenAI(
"chat",
{
model: "gpt-3.5-turbo",
stream: true,
messages,
max_tokens: 500,
},
{
apiBase: process.env.OPENAPI_ENDPOINT,
apiKey: process.env.OPENAPI_KEY,
}
);
return new Response(completionsStream);
}