Fix Issue #6199
Fix Issue: Jan UI Bottlenecks Token Rendering Speed to ~300 TPS Despite Faster Cerebras API Output
This commit is contained in:
parent
56fa4f9677
commit
4ba56f1377
@ -21,7 +21,7 @@ type AppState = {
|
||||
updateLoadingModel: (loading: boolean) => void
|
||||
updateTools: (tools: MCPTool[]) => void
|
||||
setAbortController: (threadId: string, controller: AbortController) => void
|
||||
updateTokenSpeed: (message: ThreadMessage) => void
|
||||
updateTokenSpeed: (message: ThreadMessage, increment?: number) => void
|
||||
resetTokenSpeed: () => void
|
||||
setOutOfContextDialog: (show: boolean) => void
|
||||
}
|
||||
@ -74,7 +74,7 @@ export const useAppState = create<AppState>()((set) => ({
|
||||
},
|
||||
}))
|
||||
},
|
||||
updateTokenSpeed: (message) =>
|
||||
updateTokenSpeed: (message, increment = 1) =>
|
||||
set((state) => {
|
||||
const currentTimestamp = new Date().getTime() // Get current time in milliseconds
|
||||
if (!state.tokenSpeed) {
|
||||
@ -83,7 +83,7 @@ export const useAppState = create<AppState>()((set) => ({
|
||||
tokenSpeed: {
|
||||
lastTimestamp: currentTimestamp,
|
||||
tokenSpeed: 0,
|
||||
tokenCount: 1,
|
||||
tokenCount: increment,
|
||||
message: message.id,
|
||||
},
|
||||
}
|
||||
@ -91,7 +91,7 @@ export const useAppState = create<AppState>()((set) => ({
|
||||
|
||||
const timeDiffInSeconds =
|
||||
(currentTimestamp - state.tokenSpeed.lastTimestamp) / 1000 // Time difference in seconds
|
||||
const totalTokenCount = state.tokenSpeed.tokenCount + 1
|
||||
const totalTokenCount = state.tokenSpeed.tokenCount + increment
|
||||
const averageTokenSpeed =
|
||||
totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1) // Calculate average token speed
|
||||
return {
|
||||
|
||||
@ -311,6 +311,66 @@ export const useChat = () => {
|
||||
toolCalls.push(...completion.choices[0].message.tool_calls)
|
||||
}
|
||||
} else {
|
||||
// High-throughput scheduler: batch UI updates on rAF (requestAnimationFrame)
|
||||
let rafScheduled = false
|
||||
let rafHandle: number | undefined
|
||||
let pendingDeltaCount = 0
|
||||
const scheduleFlush = () => {
|
||||
if (rafScheduled) return
|
||||
rafScheduled = true
|
||||
const doSchedule = (cb: () => void) => {
|
||||
if (typeof requestAnimationFrame !== 'undefined') {
|
||||
rafHandle = requestAnimationFrame(() => cb())
|
||||
} else {
|
||||
// Fallback for non-browser test environments
|
||||
const t = setTimeout(() => cb(), 0) as unknown as number
|
||||
rafHandle = t
|
||||
}
|
||||
}
|
||||
doSchedule(() => {
|
||||
const currentContent = newAssistantThreadContent(
|
||||
activeThread.id,
|
||||
accumulatedText,
|
||||
{
|
||||
tool_calls: toolCalls.map((e) => ({
|
||||
...e,
|
||||
state: 'pending',
|
||||
})),
|
||||
}
|
||||
)
|
||||
updateStreamingContent(currentContent)
|
||||
if (pendingDeltaCount > 0) {
|
||||
updateTokenSpeed(currentContent, pendingDeltaCount)
|
||||
}
|
||||
pendingDeltaCount = 0
|
||||
rafScheduled = false
|
||||
})
|
||||
}
|
||||
const flushIfPending = () => {
|
||||
if (!rafScheduled) return
|
||||
if (typeof cancelAnimationFrame !== 'undefined' && rafHandle !== undefined) {
|
||||
cancelAnimationFrame(rafHandle)
|
||||
} else if (rafHandle !== undefined) {
|
||||
clearTimeout(rafHandle)
|
||||
}
|
||||
// Do an immediate flush
|
||||
const currentContent = newAssistantThreadContent(
|
||||
activeThread.id,
|
||||
accumulatedText,
|
||||
{
|
||||
tool_calls: toolCalls.map((e) => ({
|
||||
...e,
|
||||
state: 'pending',
|
||||
})),
|
||||
}
|
||||
)
|
||||
updateStreamingContent(currentContent)
|
||||
if (pendingDeltaCount > 0) {
|
||||
updateTokenSpeed(currentContent, pendingDeltaCount)
|
||||
}
|
||||
pendingDeltaCount = 0
|
||||
rafScheduled = false
|
||||
}
|
||||
for await (const part of completion) {
|
||||
// Error message
|
||||
if (!part.choices) {
|
||||
@ -323,39 +383,19 @@ export const useChat = () => {
|
||||
const delta = part.choices[0]?.delta?.content || ''
|
||||
|
||||
if (part.choices[0]?.delta?.tool_calls) {
|
||||
const calls = extractToolCall(part, currentCall, toolCalls)
|
||||
const currentContent = newAssistantThreadContent(
|
||||
activeThread.id,
|
||||
accumulatedText,
|
||||
{
|
||||
tool_calls: calls.map((e) => ({
|
||||
...e,
|
||||
state: 'pending',
|
||||
})),
|
||||
}
|
||||
)
|
||||
updateStreamingContent(currentContent)
|
||||
await new Promise((resolve) => setTimeout(resolve, 0))
|
||||
extractToolCall(part, currentCall, toolCalls)
|
||||
// Schedule a flush to reflect tool update
|
||||
scheduleFlush()
|
||||
}
|
||||
if (delta) {
|
||||
accumulatedText += delta
|
||||
// Create a new object each time to avoid reference issues
|
||||
// Use a timeout to prevent React from batching updates too quickly
|
||||
const currentContent = newAssistantThreadContent(
|
||||
activeThread.id,
|
||||
accumulatedText,
|
||||
{
|
||||
tool_calls: toolCalls.map((e) => ({
|
||||
...e,
|
||||
state: 'pending',
|
||||
})),
|
||||
}
|
||||
)
|
||||
updateStreamingContent(currentContent)
|
||||
updateTokenSpeed(currentContent)
|
||||
await new Promise((resolve) => setTimeout(resolve, 0))
|
||||
pendingDeltaCount += 1
|
||||
// Batch UI update on next animation frame
|
||||
scheduleFlush()
|
||||
}
|
||||
}
|
||||
// Ensure any pending buffered content is rendered at the end
|
||||
flushIfPending()
|
||||
}
|
||||
} catch (error) {
|
||||
const errorMessage =
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user