re-#143: use OpenAI decoder and mutate final result from client (#164)
* chore: use OpenAI parser * chore: access host's services * chore: take out llm service - GGUF model for the latest llama.cpp support
This commit is contained in:
parent
83d2e34bd7
commit
6aae985a55
@ -5,7 +5,8 @@ NEXT_PUBLIC_DOWNLOAD_APP_IOS=#
|
|||||||
NEXT_PUBLIC_DOWNLOAD_APP_ANDROID=#
|
NEXT_PUBLIC_DOWNLOAD_APP_ANDROID=#
|
||||||
NEXT_PUBLIC_GRAPHQL_ENGINE_URL=http://localhost:8080/v1/graphql
|
NEXT_PUBLIC_GRAPHQL_ENGINE_URL=http://localhost:8080/v1/graphql
|
||||||
NEXT_PUBLIC_GRAPHQL_ENGINE_WEB_SOCKET_URL=ws://localhost:8080/v1/graphql
|
NEXT_PUBLIC_GRAPHQL_ENGINE_WEB_SOCKET_URL=ws://localhost:8080/v1/graphql
|
||||||
NEXT_PUBLIC_OPENAPI_ENDPOINT=http://localhost:8000/v1/completions
|
OPENAPI_ENDPOINT=http://host.docker.internal:8000/v1
|
||||||
|
OPENAPI_KEY=openapikey
|
||||||
KEYCLOAK_CLIENT_ID=hasura
|
KEYCLOAK_CLIENT_ID=hasura
|
||||||
KEYCLOAK_CLIENT_SECRET=oMtCPAV7diKpE564SBspgKj4HqlKM4Hy
|
KEYCLOAK_CLIENT_SECRET=oMtCPAV7diKpE564SBspgKj4HqlKM4Hy
|
||||||
AUTH_ISSUER=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID
|
AUTH_ISSUER=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID
|
||||||
|
|||||||
@ -112,20 +112,6 @@ services:
|
|||||||
jan_community:
|
jan_community:
|
||||||
ipv4_address: 172.20.0.15
|
ipv4_address: 172.20.0.15
|
||||||
|
|
||||||
llm:
|
|
||||||
image: ghcr.io/abetlen/llama-cpp-python@sha256:b6d21ff8c4d9baad65e1fa741a0f8c898d68735fff3f3cd777e3f0c6a1839dd4
|
|
||||||
volumes:
|
|
||||||
- ./jan-inference/llm/models:/models
|
|
||||||
ports:
|
|
||||||
- 8000:8000
|
|
||||||
environment:
|
|
||||||
MODEL: /models/${LLM_MODEL_FILE}
|
|
||||||
PYTHONUNBUFFERED: 1
|
|
||||||
restart: on-failure
|
|
||||||
networks:
|
|
||||||
jan_community:
|
|
||||||
ipv4_address: 172.20.0.18
|
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
jan_community:
|
jan_community:
|
||||||
driver: bridge
|
driver: bridge
|
||||||
|
|||||||
4
run.sh
4
run.sh
@ -124,10 +124,10 @@ progress 'cp -f sample.env .env' "Prepare .env file" $((step++))
|
|||||||
###
|
###
|
||||||
|
|
||||||
### Download Model
|
### Download Model
|
||||||
if [ -f "jan-inference/llm/models/llama-2-7b-chat.ggmlv3.q4_1.bin" ]; then
|
if [ -f "jan-inference/llm/models/llama-2-7b.Q4_K_S.gguf" ]; then
|
||||||
progress '' "Llama model - Installed" $((step++))
|
progress '' "Llama model - Installed" $((step++))
|
||||||
else
|
else
|
||||||
progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -P jan-inference/llm/models' "Download Llama model" $((step++))
|
progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_S.gguf -P jan-inference/llm/models' "Download Llama model" $((step++))
|
||||||
fi
|
fi
|
||||||
###
|
###
|
||||||
|
|
||||||
|
|||||||
@ -128,6 +128,7 @@ export const ChatBody: React.FC<Props> = observer(({ onPromptSelected }) => {
|
|||||||
const renderItem = (
|
const renderItem = (
|
||||||
index: number,
|
index: number,
|
||||||
{
|
{
|
||||||
|
id,
|
||||||
messageType,
|
messageType,
|
||||||
senderAvatarUrl,
|
senderAvatarUrl,
|
||||||
senderName,
|
senderName,
|
||||||
@ -172,9 +173,11 @@ const renderItem = (
|
|||||||
) : (
|
) : (
|
||||||
<StreamTextMessage
|
<StreamTextMessage
|
||||||
key={index}
|
key={index}
|
||||||
|
id={id}
|
||||||
avatarUrl={senderAvatarUrl ?? "/icons/app_icon.svg"}
|
avatarUrl={senderAvatarUrl ?? "/icons/app_icon.svg"}
|
||||||
senderName={senderName}
|
senderName={senderName}
|
||||||
createdAt={createdAt}
|
createdAt={createdAt}
|
||||||
|
text={text}
|
||||||
/>
|
/>
|
||||||
);
|
);
|
||||||
default:
|
default:
|
||||||
|
|||||||
@ -1,9 +1,16 @@
|
|||||||
import React from "react";
|
import React, { useEffect } from "react";
|
||||||
import { displayDate } from "@/_utils/datetime";
|
import { displayDate } from "@/_utils/datetime";
|
||||||
import { useStore } from "@/_models/RootStore";
|
import { useStore } from "@/_models/RootStore";
|
||||||
import { StreamingText, StreamingTextURL, useTextBuffer } from "nextjs-openai";
|
import { StreamingText, useTextBuffer } from "nextjs-openai";
|
||||||
import { MessageSenderType } from "@/_models/ChatMessage";
|
import { MessageSenderType, MessageStatus } from "@/_models/ChatMessage";
|
||||||
import { Role } from "@/_models/History";
|
import { Role } from "@/_models/History";
|
||||||
|
import { useMutation } from "@apollo/client";
|
||||||
|
import { OpenAI } from "openai-streams";
|
||||||
|
import {
|
||||||
|
UpdateMessageDocument,
|
||||||
|
UpdateMessageMutation,
|
||||||
|
UpdateMessageMutationVariables,
|
||||||
|
} from "@/graphql";
|
||||||
|
|
||||||
type Props = {
|
type Props = {
|
||||||
id?: string;
|
id?: string;
|
||||||
@ -14,6 +21,7 @@ type Props = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const StreamTextMessage: React.FC<Props> = ({
|
const StreamTextMessage: React.FC<Props> = ({
|
||||||
|
id,
|
||||||
senderName,
|
senderName,
|
||||||
createdAt,
|
createdAt,
|
||||||
avatarUrl = "",
|
avatarUrl = "",
|
||||||
@ -21,43 +29,59 @@ const StreamTextMessage: React.FC<Props> = ({
|
|||||||
const [data, setData] = React.useState<any | undefined>();
|
const [data, setData] = React.useState<any | undefined>();
|
||||||
const { historyStore } = useStore();
|
const { historyStore } = useStore();
|
||||||
const conversation = historyStore?.getActiveConversation();
|
const conversation = historyStore?.getActiveConversation();
|
||||||
|
const [updateMessage] = useMutation<UpdateMessageMutation>(
|
||||||
|
UpdateMessageDocument
|
||||||
|
);
|
||||||
|
|
||||||
React.useEffect(() => {
|
React.useEffect(() => {
|
||||||
const messages = conversation?.chatMessages.slice(-5).map((e) => ({
|
if (
|
||||||
role:
|
!conversation ||
|
||||||
e.messageSenderType === MessageSenderType.User
|
conversation.chatMessages.findIndex((e) => e.id === id) !==
|
||||||
? Role.User
|
conversation.chatMessages.length - 1
|
||||||
: Role.Assistant,
|
) {
|
||||||
content: e.text,
|
return;
|
||||||
}));
|
}
|
||||||
|
const messages = conversation?.chatMessages
|
||||||
|
.slice(-10)
|
||||||
|
.filter((e) => e.id !== id)
|
||||||
|
.map((e) => ({
|
||||||
|
role:
|
||||||
|
e.messageSenderType === MessageSenderType.User
|
||||||
|
? Role.User
|
||||||
|
: Role.Assistant,
|
||||||
|
content: e.text,
|
||||||
|
}));
|
||||||
setData({
|
setData({
|
||||||
messages,
|
messages,
|
||||||
stream: true,
|
|
||||||
model: "gpt-3.5-turbo",
|
|
||||||
max_tokens: 500,
|
|
||||||
});
|
});
|
||||||
}, [conversation]);
|
}, [conversation]);
|
||||||
|
|
||||||
const { buffer, refresh, cancel } = useTextBuffer({
|
const { buffer, done } = useTextBuffer({
|
||||||
url: `${process.env.NEXT_PUBLIC_OPENAPI_ENDPOINT}`,
|
url: `api/openai`,
|
||||||
throttle: 100,
|
|
||||||
data,
|
data,
|
||||||
|
|
||||||
options: {
|
|
||||||
headers: {
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
});
|
});
|
||||||
|
|
||||||
const parsedBuffer = (buffer: String) => {
|
useEffect(() => {
|
||||||
try {
|
if (done) {
|
||||||
const json = buffer.replace("data: ", "");
|
// mutate result
|
||||||
return JSON.parse(json).choices[0].text;
|
const variables: UpdateMessageMutationVariables = {
|
||||||
} catch (e) {
|
id: id,
|
||||||
return "";
|
data: {
|
||||||
|
content: buffer.join(""),
|
||||||
|
status: MessageStatus.Ready,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
updateMessage({
|
||||||
|
variables,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
};
|
}, [done]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (buffer.length > 0 && conversation?.isWaitingForModelResponse) {
|
||||||
|
historyStore.finishActiveConversationWaiting();
|
||||||
|
}
|
||||||
|
}, [buffer]);
|
||||||
|
|
||||||
return data ? (
|
return data ? (
|
||||||
<div className="flex items-start gap-2">
|
<div className="flex items-start gap-2">
|
||||||
@ -78,9 +102,7 @@ const StreamTextMessage: React.FC<Props> = ({
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]">
|
<div className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]">
|
||||||
<StreamingText
|
<StreamingText buffer={buffer} fade={100} />
|
||||||
buffer={buffer.map((b) => parsedBuffer(b))}
|
|
||||||
></StreamingText>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
26
web-client/app/api/openai/route.ts
Normal file
26
web-client/app/api/openai/route.ts
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import { OpenAI } from "openai-streams";
|
||||||
|
|
||||||
|
export async function POST(req: Request) {
|
||||||
|
const { messages } = await req.json();
|
||||||
|
if (!messages) {
|
||||||
|
return new Response(null, {
|
||||||
|
status: 400,
|
||||||
|
statusText: "Did not include `messages` parameter",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
const completionsStream = await OpenAI(
|
||||||
|
"chat",
|
||||||
|
{
|
||||||
|
model: "gpt-3.5-turbo",
|
||||||
|
stream: true,
|
||||||
|
messages,
|
||||||
|
max_tokens: 500,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
apiBase: process.env.OPENAPI_ENDPOINT,
|
||||||
|
apiKey: process.env.OPENAPI_KEY,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
return new Response(completionsStream);
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user