re-#143: use OpenAI decoder and mutate final result from client (#164)
* chore: use OpenAI parser * chore: access host's services * chore: take out llm service - GGUF model for the latest llama.cpp support
This commit is contained in:
parent
83d2e34bd7
commit
6aae985a55
@ -5,7 +5,8 @@ NEXT_PUBLIC_DOWNLOAD_APP_IOS=#
|
||||
NEXT_PUBLIC_DOWNLOAD_APP_ANDROID=#
|
||||
NEXT_PUBLIC_GRAPHQL_ENGINE_URL=http://localhost:8080/v1/graphql
|
||||
NEXT_PUBLIC_GRAPHQL_ENGINE_WEB_SOCKET_URL=ws://localhost:8080/v1/graphql
|
||||
NEXT_PUBLIC_OPENAPI_ENDPOINT=http://localhost:8000/v1/completions
|
||||
OPENAPI_ENDPOINT=http://host.docker.internal:8000/v1
|
||||
OPENAPI_KEY=openapikey
|
||||
KEYCLOAK_CLIENT_ID=hasura
|
||||
KEYCLOAK_CLIENT_SECRET=oMtCPAV7diKpE564SBspgKj4HqlKM4Hy
|
||||
AUTH_ISSUER=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID
|
||||
|
||||
@ -112,20 +112,6 @@ services:
|
||||
jan_community:
|
||||
ipv4_address: 172.20.0.15
|
||||
|
||||
llm:
|
||||
image: ghcr.io/abetlen/llama-cpp-python@sha256:b6d21ff8c4d9baad65e1fa741a0f8c898d68735fff3f3cd777e3f0c6a1839dd4
|
||||
volumes:
|
||||
- ./jan-inference/llm/models:/models
|
||||
ports:
|
||||
- 8000:8000
|
||||
environment:
|
||||
MODEL: /models/${LLM_MODEL_FILE}
|
||||
PYTHONUNBUFFERED: 1
|
||||
restart: on-failure
|
||||
networks:
|
||||
jan_community:
|
||||
ipv4_address: 172.20.0.18
|
||||
|
||||
networks:
|
||||
jan_community:
|
||||
driver: bridge
|
||||
|
||||
4
run.sh
4
run.sh
@ -124,10 +124,10 @@ progress 'cp -f sample.env .env' "Prepare .env file" $((step++))
|
||||
###
|
||||
|
||||
### Download Model
|
||||
if [ -f "jan-inference/llm/models/llama-2-7b-chat.ggmlv3.q4_1.bin" ]; then
|
||||
if [ -f "jan-inference/llm/models/llama-2-7b.Q4_K_S.gguf" ]; then
|
||||
progress '' "Llama model - Installed" $((step++))
|
||||
else
|
||||
progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -P jan-inference/llm/models' "Download Llama model" $((step++))
|
||||
progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_S.gguf -P jan-inference/llm/models' "Download Llama model" $((step++))
|
||||
fi
|
||||
###
|
||||
|
||||
|
||||
@ -128,6 +128,7 @@ export const ChatBody: React.FC<Props> = observer(({ onPromptSelected }) => {
|
||||
const renderItem = (
|
||||
index: number,
|
||||
{
|
||||
id,
|
||||
messageType,
|
||||
senderAvatarUrl,
|
||||
senderName,
|
||||
@ -172,9 +173,11 @@ const renderItem = (
|
||||
) : (
|
||||
<StreamTextMessage
|
||||
key={index}
|
||||
id={id}
|
||||
avatarUrl={senderAvatarUrl ?? "/icons/app_icon.svg"}
|
||||
senderName={senderName}
|
||||
createdAt={createdAt}
|
||||
text={text}
|
||||
/>
|
||||
);
|
||||
default:
|
||||
|
||||
@ -1,9 +1,16 @@
|
||||
import React from "react";
|
||||
import React, { useEffect } from "react";
|
||||
import { displayDate } from "@/_utils/datetime";
|
||||
import { useStore } from "@/_models/RootStore";
|
||||
import { StreamingText, StreamingTextURL, useTextBuffer } from "nextjs-openai";
|
||||
import { MessageSenderType } from "@/_models/ChatMessage";
|
||||
import { StreamingText, useTextBuffer } from "nextjs-openai";
|
||||
import { MessageSenderType, MessageStatus } from "@/_models/ChatMessage";
|
||||
import { Role } from "@/_models/History";
|
||||
import { useMutation } from "@apollo/client";
|
||||
import { OpenAI } from "openai-streams";
|
||||
import {
|
||||
UpdateMessageDocument,
|
||||
UpdateMessageMutation,
|
||||
UpdateMessageMutationVariables,
|
||||
} from "@/graphql";
|
||||
|
||||
type Props = {
|
||||
id?: string;
|
||||
@ -14,6 +21,7 @@ type Props = {
|
||||
};
|
||||
|
||||
const StreamTextMessage: React.FC<Props> = ({
|
||||
id,
|
||||
senderName,
|
||||
createdAt,
|
||||
avatarUrl = "",
|
||||
@ -21,43 +29,59 @@ const StreamTextMessage: React.FC<Props> = ({
|
||||
const [data, setData] = React.useState<any | undefined>();
|
||||
const { historyStore } = useStore();
|
||||
const conversation = historyStore?.getActiveConversation();
|
||||
const [updateMessage] = useMutation<UpdateMessageMutation>(
|
||||
UpdateMessageDocument
|
||||
);
|
||||
|
||||
React.useEffect(() => {
|
||||
const messages = conversation?.chatMessages.slice(-5).map((e) => ({
|
||||
role:
|
||||
e.messageSenderType === MessageSenderType.User
|
||||
? Role.User
|
||||
: Role.Assistant,
|
||||
content: e.text,
|
||||
}));
|
||||
if (
|
||||
!conversation ||
|
||||
conversation.chatMessages.findIndex((e) => e.id === id) !==
|
||||
conversation.chatMessages.length - 1
|
||||
) {
|
||||
return;
|
||||
}
|
||||
const messages = conversation?.chatMessages
|
||||
.slice(-10)
|
||||
.filter((e) => e.id !== id)
|
||||
.map((e) => ({
|
||||
role:
|
||||
e.messageSenderType === MessageSenderType.User
|
||||
? Role.User
|
||||
: Role.Assistant,
|
||||
content: e.text,
|
||||
}));
|
||||
setData({
|
||||
messages,
|
||||
stream: true,
|
||||
model: "gpt-3.5-turbo",
|
||||
max_tokens: 500,
|
||||
});
|
||||
}, [conversation]);
|
||||
|
||||
const { buffer, refresh, cancel } = useTextBuffer({
|
||||
url: `${process.env.NEXT_PUBLIC_OPENAPI_ENDPOINT}`,
|
||||
throttle: 100,
|
||||
const { buffer, done } = useTextBuffer({
|
||||
url: `api/openai`,
|
||||
data,
|
||||
|
||||
options: {
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const parsedBuffer = (buffer: String) => {
|
||||
try {
|
||||
const json = buffer.replace("data: ", "");
|
||||
return JSON.parse(json).choices[0].text;
|
||||
} catch (e) {
|
||||
return "";
|
||||
useEffect(() => {
|
||||
if (done) {
|
||||
// mutate result
|
||||
const variables: UpdateMessageMutationVariables = {
|
||||
id: id,
|
||||
data: {
|
||||
content: buffer.join(""),
|
||||
status: MessageStatus.Ready,
|
||||
},
|
||||
};
|
||||
updateMessage({
|
||||
variables,
|
||||
});
|
||||
}
|
||||
};
|
||||
}, [done]);
|
||||
|
||||
useEffect(() => {
|
||||
if (buffer.length > 0 && conversation?.isWaitingForModelResponse) {
|
||||
historyStore.finishActiveConversationWaiting();
|
||||
}
|
||||
}, [buffer]);
|
||||
|
||||
return data ? (
|
||||
<div className="flex items-start gap-2">
|
||||
@ -78,9 +102,7 @@ const StreamTextMessage: React.FC<Props> = ({
|
||||
</div>
|
||||
</div>
|
||||
<div className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]">
|
||||
<StreamingText
|
||||
buffer={buffer.map((b) => parsedBuffer(b))}
|
||||
></StreamingText>
|
||||
<StreamingText buffer={buffer} fade={100} />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
26
web-client/app/api/openai/route.ts
Normal file
26
web-client/app/api/openai/route.ts
Normal file
@ -0,0 +1,26 @@
|
||||
import { OpenAI } from "openai-streams";
|
||||
|
||||
export async function POST(req: Request) {
|
||||
const { messages } = await req.json();
|
||||
if (!messages) {
|
||||
return new Response(null, {
|
||||
status: 400,
|
||||
statusText: "Did not include `messages` parameter",
|
||||
});
|
||||
}
|
||||
const completionsStream = await OpenAI(
|
||||
"chat",
|
||||
{
|
||||
model: "gpt-3.5-turbo",
|
||||
stream: true,
|
||||
messages,
|
||||
max_tokens: 500,
|
||||
},
|
||||
{
|
||||
apiBase: process.env.OPENAPI_ENDPOINT,
|
||||
apiKey: process.env.OPENAPI_KEY,
|
||||
}
|
||||
);
|
||||
|
||||
return new Response(completionsStream);
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user