Merge pull request #159 from janhq/fix/#143-direct-communication-between-client-llm

fix: #143 - Direct communication between client and llm inference service
This commit is contained in:
Daniel 2023-09-12 15:09:59 +08:00 committed by GitHub
commit 83d2e34bd7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 57 additions and 318 deletions

View File

@ -84,14 +84,3 @@ delete_permissions:
user_id: user_id:
_eq: X-Hasura-User-Id _eq: X-Hasura-User-Id
comment: "" comment: ""
event_triggers:
- name: new_llm_message
definition:
enable_manual: false
insert:
columns: '*'
retry_conf:
interval_sec: 10
num_retries: 0
timeout_sec: 60
webhook: '{{HASURA_EVENTS_HOOK_URL}}'

View File

@ -1,16 +0,0 @@
# Dockerfile
# alpine does not work with wrangler
FROM node
RUN mkdir -p /worker
WORKDIR /worker
RUN npm install -g wrangler
COPY . /worker
EXPOSE 8787
CMD ["wrangler", "dev"]

View File

@ -1,196 +0,0 @@
export interface Env {
HASURA_ADMIN_API_KEY: string;
LLM_INFERENCE_ENDPOINT: string;
INFERENCE_API_KEY: string;
HASURA_GRAPHQL_ENGINE_ENDPOINT: string;
}
export default {
async fetch(request: Request, env: Env) {
return handleRequest(env, request);
},
};
async function handleRequest(env: Env, request: Request) {
const apiurl = env.LLM_INFERENCE_ENDPOINT;
const requestBody = await request.json();
let lastCallTime = 0;
let timeoutId: any;
let done = true;
function throttle(fn: () => void, delay: number) {
return async function () {
const now = new Date().getTime();
const timeSinceLastCall = now - lastCallTime;
if (timeSinceLastCall >= delay && done) {
lastCallTime = now;
done = false;
await fn();
done = true;
} else {
clearTimeout(timeoutId);
timeoutId = setTimeout(async () => {
lastCallTime = now;
done = false;
await fn();
done = true;
}, delay - timeSinceLastCall);
}
};
}
const messageBody = {
id: requestBody.event.data.new.id,
content: requestBody.event.data.new.content,
messages: requestBody.event.data.new.prompt_cache,
status: requestBody.event.data.new.status,
};
if (messageBody.status !== "pending") {
return new Response(JSON.stringify({ status: "success" }), {
status: 200,
statusText: "success",
});
}
const llmRequestBody = {
messages: messageBody.messages,
stream: true,
model: "gpt-3.5-turbo",
max_tokens: 500,
};
const init = {
body: JSON.stringify(llmRequestBody),
headers: {
"Content-Type": "application/json",
Accept: "text/event-stream",
Authorization: "Access-Control-Allow-Origin: *",
"api-key": env.INFERENCE_API_KEY,
},
method: "POST",
};
return fetch(apiurl, init)
.then((res) => res.body?.getReader())
.then(async (reader) => {
if (!reader) {
console.error("Error: fail to read data from response");
return;
}
let answer = "";
let cachedChunk = "";
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
const textDecoder = new TextDecoder("utf-8");
const chunk = textDecoder.decode(value);
cachedChunk += chunk;
const matched = cachedChunk.match(/data: {(.*)}/g);
if (!matched) {
continue;
}
let deltaText = "";
for (const line of cachedChunk.split("\n")) {
const trimmedLine = line.trim();
if (!trimmedLine || trimmedLine === "data: [DONE]") {
continue;
}
const json = trimmedLine.replace("data: ", "");
try {
const obj = JSON.parse(json);
const content = obj.choices[0].delta.content;
if (content) deltaText = deltaText.concat(content);
} catch (e) {
}
}
cachedChunk = "";
answer = answer + deltaText;
const variables = {
id: messageBody.id,
data: {
content: answer.trim(),
},
};
throttle(async () => {
await fetch(env.HASURA_GRAPHQL_ENGINE_ENDPOINT + "/v1/graphql", {
method: "POST",
body: JSON.stringify({ query: updateMessageQuery, variables }),
headers: {
"Content-Type": "application/json",
"x-hasura-admin-secret": env.HASURA_ADMIN_API_KEY,
},
})
.catch((error) => {
console.error(error);
})
.finally(() => console.log("++-- request sent"));
}, 300)();
}
const variables = {
id: messageBody.id,
data: {
status: "ready",
prompt_cache: null,
},
};
await fetch(env.HASURA_GRAPHQL_ENGINE_ENDPOINT + "/v1/graphql", {
method: "POST",
body: JSON.stringify({ query: updateMessageQuery, variables }),
headers: {
"Content-Type": "application/json",
"x-hasura-admin-secret": env.HASURA_ADMIN_API_KEY,
},
}).catch((error) => {
console.error(error);
});
const convUpdateVars = {
id: requestBody.event.data.new.conversation_id,
content: answer
}
await fetch(env.HASURA_GRAPHQL_ENGINE_ENDPOINT + "/v1/graphql", {
method: "POST",
body: JSON.stringify({ query: updateConversationquery, variables: convUpdateVars }),
headers: {
"Content-Type": "application/json",
"x-hasura-admin-secret": env.HASURA_ADMIN_API_KEY,
},
}).catch((error) => {
console.error(error);
});
return new Response(JSON.stringify({ status: "success" }), {
status: 200,
statusText: "success",
});
});
}
const updateMessageQuery = `
mutation chatCompletions($id: uuid = "", $data: messages_set_input) {
update_messages_by_pk(pk_columns: {id: $id}, _set: $data) {
id
content
}
}
`;
const updateConversationquery = `
mutation updateConversation($id: uuid = "", $content: String = "") {
update_conversations_by_pk(pk_columns: {id: $id}, _set: {last_text_message: $content}) {
id
}
}
`

View File

@ -1,11 +0,0 @@
name = "cloudlfare_worker"
main = "worker.ts"
compatibility_date = "2023-06-08"
workers_dev = true
[vars]
HASURA_GRAPHQL_ENGINE_ENDPOINT = "http://graphql-engine:8080"
HASURA_ADMIN_API_KEY = "myadminsecretkey"
LLM_INFERENCE_ENDPOINT="http://llm:8000/v1/chat/completions"
INFERENCE_API_KEY=""
PROJECT_ID = ""

View File

@ -5,6 +5,7 @@ NEXT_PUBLIC_DOWNLOAD_APP_IOS=#
NEXT_PUBLIC_DOWNLOAD_APP_ANDROID=# NEXT_PUBLIC_DOWNLOAD_APP_ANDROID=#
NEXT_PUBLIC_GRAPHQL_ENGINE_URL=http://localhost:8080/v1/graphql NEXT_PUBLIC_GRAPHQL_ENGINE_URL=http://localhost:8080/v1/graphql
NEXT_PUBLIC_GRAPHQL_ENGINE_WEB_SOCKET_URL=ws://localhost:8080/v1/graphql NEXT_PUBLIC_GRAPHQL_ENGINE_WEB_SOCKET_URL=ws://localhost:8080/v1/graphql
NEXT_PUBLIC_OPENAPI_ENDPOINT=http://localhost:8000/v1/completions
KEYCLOAK_CLIENT_ID=hasura KEYCLOAK_CLIENT_ID=hasura
KEYCLOAK_CLIENT_SECRET=oMtCPAV7diKpE564SBspgKj4HqlKM4Hy KEYCLOAK_CLIENT_SECRET=oMtCPAV7diKpE564SBspgKj4HqlKM4Hy
AUTH_ISSUER=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID AUTH_ISSUER=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID
@ -12,4 +13,3 @@ NEXTAUTH_URL=http://localhost:3000
NEXTAUTH_SECRET=my-secret NEXTAUTH_SECRET=my-secret
END_SESSION_URL=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID/protocol/openid-connect/logout END_SESSION_URL=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID/protocol/openid-connect/logout
REFRESH_TOKEN_URL=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID/protocol/openid-connect/token REFRESH_TOKEN_URL=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID/protocol/openid-connect/token
HASURA_ADMIN_TOKEN=myadminsecretkey

View File

@ -68,21 +68,6 @@ services:
jan_community: jan_community:
ipv4_address: 172.20.0.12 ipv4_address: 172.20.0.12
worker:
build:
context: ./app-backend/worker
dockerfile: ./Dockerfile
restart: always
environment:
- "NODE_ENV=development"
volumes:
- ./app-backend/worker:/worker
ports:
- 8787:8787
networks:
jan_community:
ipv4_address: 172.20.0.13
data-connector-agent: data-connector-agent:
image: hasura/graphql-data-connector:v2.31.0 image: hasura/graphql-data-connector:v2.31.0
restart: always restart: always

View File

@ -33,8 +33,13 @@ For using our complete solution, check [this](https://github.com/janhq/jan)
``` ```
yarn dev yarn dev
``` ```
4. **Regenerate Graphql:**
4. **Access Jan Web:** ```
HASURA_ADMIN_TOKEN="[hasura_admin_secret_key]" yarn generate
```
5. **Access Jan Web:**
Open your web browser and navigate to `http://localhost:3000` to access the Jan Web application. Open your web browser and navigate to `http://localhost:3000` to access the Jan Web application.

View File

@ -128,7 +128,6 @@ export const ChatBody: React.FC<Props> = observer(({ onPromptSelected }) => {
const renderItem = ( const renderItem = (
index: number, index: number,
{ {
id,
messageType, messageType,
senderAvatarUrl, senderAvatarUrl,
senderName, senderName,
@ -173,11 +172,9 @@ const renderItem = (
) : ( ) : (
<StreamTextMessage <StreamTextMessage
key={index} key={index}
id={id}
avatarUrl={senderAvatarUrl ?? "/icons/app_icon.svg"} avatarUrl={senderAvatarUrl ?? "/icons/app_icon.svg"}
senderName={senderName} senderName={senderName}
createdAt={createdAt} createdAt={createdAt}
text={text}
/> />
); );
default: default:

View File

@ -1,13 +1,9 @@
import React, { useEffect } from "react"; import React from "react";
import { displayDate } from "@/_utils/datetime"; import { displayDate } from "@/_utils/datetime";
import { TextCode } from "../TextCode";
import { getMessageCode } from "@/_utils/message";
import { useSubscription } from "@apollo/client";
import {
SubscribeMessageDocument,
SubscribeMessageSubscription,
} from "@/graphql";
import { useStore } from "@/_models/RootStore"; import { useStore } from "@/_models/RootStore";
import { StreamingText, StreamingTextURL, useTextBuffer } from "nextjs-openai";
import { MessageSenderType } from "@/_models/ChatMessage";
import { Role } from "@/_models/History";
type Props = { type Props = {
id?: string; id?: string;
@ -18,54 +14,52 @@ type Props = {
}; };
const StreamTextMessage: React.FC<Props> = ({ const StreamTextMessage: React.FC<Props> = ({
id,
senderName, senderName,
createdAt, createdAt,
avatarUrl = "", avatarUrl = "",
text = "",
}) => { }) => {
const [textMessage, setTextMessage] = React.useState(text); const [data, setData] = React.useState<any | undefined>();
const [completedTyping, setCompletedTyping] = React.useState(false);
const tokenIndex = React.useRef(0);
const { historyStore } = useStore(); const { historyStore } = useStore();
const { data } = useSubscription<SubscribeMessageSubscription>( const conversation = historyStore?.getActiveConversation();
SubscribeMessageDocument,
{ React.useEffect(() => {
variables: { const messages = conversation?.chatMessages.slice(-5).map((e) => ({
id, role:
e.messageSenderType === MessageSenderType.User
? Role.User
: Role.Assistant,
content: e.text,
}));
setData({
messages,
stream: true,
model: "gpt-3.5-turbo",
max_tokens: 500,
});
}, [conversation]);
const { buffer, refresh, cancel } = useTextBuffer({
url: `${process.env.NEXT_PUBLIC_OPENAPI_ENDPOINT}`,
throttle: 100,
data,
options: {
headers: {
"Content-Type": "application/json",
}, },
},
});
const parsedBuffer = (buffer: String) => {
try {
const json = buffer.replace("data: ", "");
return JSON.parse(json).choices[0].text;
} catch (e) {
return "";
} }
); };
useEffect(() => { return data ? (
if (
data?.messages_by_pk?.content &&
data.messages_by_pk.content.length > text.length
) {
historyStore.finishActiveConversationWaiting();
}
}, [data, text]); // eslint-disable-line react-hooks/exhaustive-deps
useEffect(() => {
setCompletedTyping(false);
const stringResponse = data?.messages_by_pk?.content ?? text;
const intervalId = setInterval(() => {
setTextMessage(stringResponse.slice(0, tokenIndex.current));
tokenIndex.current++;
if (tokenIndex.current > stringResponse.length) {
clearInterval(intervalId);
setCompletedTyping(true);
}
}, 20);
return () => clearInterval(intervalId);
}, [data?.messages_by_pk?.content, text]);
return textMessage.length > 0 ? (
<div className="flex items-start gap-2"> <div className="flex items-start gap-2">
<img <img
className="rounded-full" className="rounded-full"
@ -83,20 +77,11 @@ const StreamTextMessage: React.FC<Props> = ({
{displayDate(createdAt)} {displayDate(createdAt)}
</div> </div>
</div> </div>
{textMessage.includes("```") ? ( <div className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]">
getMessageCode(textMessage).map((item, i) => ( <StreamingText
<div className="flex gap-1 flex-col" key={i}> buffer={buffer.map((b) => parsedBuffer(b))}
<p className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]"> ></StreamingText>
{item.text} </div>
</p>
{item.code.trim().length > 0 && <TextCode text={item.code} />}
</div>
))
) : (
<p className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]">
{textMessage}
</p>
)}
</div> </div>
</div> </div>
) : ( ) : (

View File

@ -8,7 +8,6 @@
"start": "next start", "start": "next start",
"lint": "next lint", "lint": "next lint",
"compile": "tsc --noEmit -p . --pretty", "compile": "tsc --noEmit -p . --pretty",
"configure": "op read op://Shared/WebDevelopmentEnv/.env > .env && op read op://Shared/WebDevelopmentFirebaseConfig/firebase_configs.json > app/_services/firebase/firebase_configs.json",
"generate": "graphql-codegen --config codegen.ts", "generate": "graphql-codegen --config codegen.ts",
"watch": "graphql-codegen -w" "watch": "graphql-codegen -w"
}, },
@ -36,6 +35,8 @@
"next": "13.4.10", "next": "13.4.10",
"next-auth": "^4.23.1", "next-auth": "^4.23.1",
"next-themes": "^0.2.1", "next-themes": "^0.2.1",
"nextjs-openai": "^7.2.0",
"openai-streams": "^6.2.0",
"postcss": "8.4.26", "postcss": "8.4.26",
"react": "18.2.0", "react": "18.2.0",
"react-dom": "18.2.0", "react-dom": "18.2.0",