re-#143: use OpenAI decoder and mutate final result from client (#164)

* chore: use OpenAI parser

* chore: access host's services

* chore: take out llm service - GGUF model for the latest llama.cpp support
This commit is contained in:
Louis 2023-09-12 16:29:26 +07:00 committed by GitHub
parent 83d2e34bd7
commit 6aae985a55
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 87 additions and 49 deletions

View File

@ -5,7 +5,8 @@ NEXT_PUBLIC_DOWNLOAD_APP_IOS=#
NEXT_PUBLIC_DOWNLOAD_APP_ANDROID=#
NEXT_PUBLIC_GRAPHQL_ENGINE_URL=http://localhost:8080/v1/graphql
NEXT_PUBLIC_GRAPHQL_ENGINE_WEB_SOCKET_URL=ws://localhost:8080/v1/graphql
NEXT_PUBLIC_OPENAPI_ENDPOINT=http://localhost:8000/v1/completions
OPENAPI_ENDPOINT=http://host.docker.internal:8000/v1
OPENAPI_KEY=openapikey
KEYCLOAK_CLIENT_ID=hasura
KEYCLOAK_CLIENT_SECRET=oMtCPAV7diKpE564SBspgKj4HqlKM4Hy
AUTH_ISSUER=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID

View File

@ -112,20 +112,6 @@ services:
jan_community:
ipv4_address: 172.20.0.15
llm:
image: ghcr.io/abetlen/llama-cpp-python@sha256:b6d21ff8c4d9baad65e1fa741a0f8c898d68735fff3f3cd777e3f0c6a1839dd4
volumes:
- ./jan-inference/llm/models:/models
ports:
- 8000:8000
environment:
MODEL: /models/${LLM_MODEL_FILE}
PYTHONUNBUFFERED: 1
restart: on-failure
networks:
jan_community:
ipv4_address: 172.20.0.18
networks:
jan_community:
driver: bridge

4
run.sh
View File

@ -124,10 +124,10 @@ progress 'cp -f sample.env .env' "Prepare .env file" $((step++))
###
### Download Model
if [ -f "jan-inference/llm/models/llama-2-7b-chat.ggmlv3.q4_1.bin" ]; then
if [ -f "jan-inference/llm/models/llama-2-7b.Q4_K_S.gguf" ]; then
progress '' "Llama model - Installed" $((step++))
else
progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -P jan-inference/llm/models' "Download Llama model" $((step++))
progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_S.gguf -P jan-inference/llm/models' "Download Llama model" $((step++))
fi
###

View File

@ -128,6 +128,7 @@ export const ChatBody: React.FC<Props> = observer(({ onPromptSelected }) => {
const renderItem = (
index: number,
{
id,
messageType,
senderAvatarUrl,
senderName,
@ -172,9 +173,11 @@ const renderItem = (
) : (
<StreamTextMessage
key={index}
id={id}
avatarUrl={senderAvatarUrl ?? "/icons/app_icon.svg"}
senderName={senderName}
createdAt={createdAt}
text={text}
/>
);
default:

View File

@ -1,9 +1,16 @@
import React from "react";
import React, { useEffect } from "react";
import { displayDate } from "@/_utils/datetime";
import { useStore } from "@/_models/RootStore";
import { StreamingText, StreamingTextURL, useTextBuffer } from "nextjs-openai";
import { MessageSenderType } from "@/_models/ChatMessage";
import { StreamingText, useTextBuffer } from "nextjs-openai";
import { MessageSenderType, MessageStatus } from "@/_models/ChatMessage";
import { Role } from "@/_models/History";
import { useMutation } from "@apollo/client";
import { OpenAI } from "openai-streams";
import {
UpdateMessageDocument,
UpdateMessageMutation,
UpdateMessageMutationVariables,
} from "@/graphql";
type Props = {
id?: string;
@ -14,6 +21,7 @@ type Props = {
};
const StreamTextMessage: React.FC<Props> = ({
id,
senderName,
createdAt,
avatarUrl = "",
@ -21,9 +29,22 @@ const StreamTextMessage: React.FC<Props> = ({
const [data, setData] = React.useState<any | undefined>();
const { historyStore } = useStore();
const conversation = historyStore?.getActiveConversation();
const [updateMessage] = useMutation<UpdateMessageMutation>(
UpdateMessageDocument
);
React.useEffect(() => {
const messages = conversation?.chatMessages.slice(-5).map((e) => ({
if (
!conversation ||
conversation.chatMessages.findIndex((e) => e.id === id) !==
conversation.chatMessages.length - 1
) {
return;
}
const messages = conversation?.chatMessages
.slice(-10)
.filter((e) => e.id !== id)
.map((e) => ({
role:
e.messageSenderType === MessageSenderType.User
? Role.User
@ -32,32 +53,35 @@ const StreamTextMessage: React.FC<Props> = ({
}));
setData({
messages,
stream: true,
model: "gpt-3.5-turbo",
max_tokens: 500,
});
}, [conversation]);
const { buffer, refresh, cancel } = useTextBuffer({
url: `${process.env.NEXT_PUBLIC_OPENAPI_ENDPOINT}`,
throttle: 100,
const { buffer, done } = useTextBuffer({
url: `api/openai`,
data,
options: {
headers: {
"Content-Type": "application/json",
},
},
});
const parsedBuffer = (buffer: String) => {
try {
const json = buffer.replace("data: ", "");
return JSON.parse(json).choices[0].text;
} catch (e) {
return "";
}
useEffect(() => {
if (done) {
// mutate result
const variables: UpdateMessageMutationVariables = {
id: id,
data: {
content: buffer.join(""),
status: MessageStatus.Ready,
},
};
updateMessage({
variables,
});
}
}, [done]);
useEffect(() => {
if (buffer.length > 0 && conversation?.isWaitingForModelResponse) {
historyStore.finishActiveConversationWaiting();
}
}, [buffer]);
return data ? (
<div className="flex items-start gap-2">
@ -78,9 +102,7 @@ const StreamTextMessage: React.FC<Props> = ({
</div>
</div>
<div className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]">
<StreamingText
buffer={buffer.map((b) => parsedBuffer(b))}
></StreamingText>
<StreamingText buffer={buffer} fade={100} />
</div>
</div>
</div>

View File

@ -0,0 +1,26 @@
import { OpenAI } from "openai-streams";
export async function POST(req: Request) {
const { messages } = await req.json();
if (!messages) {
return new Response(null, {
status: 400,
statusText: "Did not include `messages` parameter",
});
}
const completionsStream = await OpenAI(
"chat",
{
model: "gpt-3.5-turbo",
stream: true,
messages,
max_tokens: 500,
},
{
apiBase: process.env.OPENAPI_ENDPOINT,
apiKey: process.env.OPENAPI_KEY,
}
);
return new Response(completionsStream);
}