diff --git a/web-app/src/hooks/useAppState.ts b/web-app/src/hooks/useAppState.ts index eade55af5..5876daefb 100644 --- a/web-app/src/hooks/useAppState.ts +++ b/web-app/src/hooks/useAppState.ts @@ -21,7 +21,7 @@ type AppState = { updateLoadingModel: (loading: boolean) => void updateTools: (tools: MCPTool[]) => void setAbortController: (threadId: string, controller: AbortController) => void - updateTokenSpeed: (message: ThreadMessage) => void + updateTokenSpeed: (message: ThreadMessage, increment?: number) => void resetTokenSpeed: () => void setOutOfContextDialog: (show: boolean) => void } @@ -74,7 +74,7 @@ export const useAppState = create()((set) => ({ }, })) }, - updateTokenSpeed: (message) => + updateTokenSpeed: (message, increment = 1) => set((state) => { const currentTimestamp = new Date().getTime() // Get current time in milliseconds if (!state.tokenSpeed) { @@ -83,7 +83,7 @@ export const useAppState = create()((set) => ({ tokenSpeed: { lastTimestamp: currentTimestamp, tokenSpeed: 0, - tokenCount: 1, + tokenCount: increment, message: message.id, }, } @@ -91,7 +91,7 @@ export const useAppState = create()((set) => ({ const timeDiffInSeconds = (currentTimestamp - state.tokenSpeed.lastTimestamp) / 1000 // Time difference in seconds - const totalTokenCount = state.tokenSpeed.tokenCount + 1 + const totalTokenCount = state.tokenSpeed.tokenCount + increment const averageTokenSpeed = totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1) // Calculate average token speed return { diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts index 09c0473c6..3300d1ba4 100644 --- a/web-app/src/hooks/useChat.ts +++ b/web-app/src/hooks/useChat.ts @@ -311,6 +311,66 @@ export const useChat = () => { toolCalls.push(...completion.choices[0].message.tool_calls) } } else { + // High-throughput scheduler: batch UI updates on rAF (requestAnimationFrame) + let rafScheduled = false + let rafHandle: number | undefined + let pendingDeltaCount = 0 + const scheduleFlush = () => { + if (rafScheduled) return + rafScheduled = true + const doSchedule = (cb: () => void) => { + if (typeof requestAnimationFrame !== 'undefined') { + rafHandle = requestAnimationFrame(() => cb()) + } else { + // Fallback for non-browser test environments + const t = setTimeout(() => cb(), 0) as unknown as number + rafHandle = t + } + } + doSchedule(() => { + const currentContent = newAssistantThreadContent( + activeThread.id, + accumulatedText, + { + tool_calls: toolCalls.map((e) => ({ + ...e, + state: 'pending', + })), + } + ) + updateStreamingContent(currentContent) + if (pendingDeltaCount > 0) { + updateTokenSpeed(currentContent, pendingDeltaCount) + } + pendingDeltaCount = 0 + rafScheduled = false + }) + } + const flushIfPending = () => { + if (!rafScheduled) return + if (typeof cancelAnimationFrame !== 'undefined' && rafHandle !== undefined) { + cancelAnimationFrame(rafHandle) + } else if (rafHandle !== undefined) { + clearTimeout(rafHandle) + } + // Do an immediate flush + const currentContent = newAssistantThreadContent( + activeThread.id, + accumulatedText, + { + tool_calls: toolCalls.map((e) => ({ + ...e, + state: 'pending', + })), + } + ) + updateStreamingContent(currentContent) + if (pendingDeltaCount > 0) { + updateTokenSpeed(currentContent, pendingDeltaCount) + } + pendingDeltaCount = 0 + rafScheduled = false + } for await (const part of completion) { // Error message if (!part.choices) { @@ -323,39 +383,19 @@ export const useChat = () => { const delta = part.choices[0]?.delta?.content || '' if (part.choices[0]?.delta?.tool_calls) { - const calls = extractToolCall(part, currentCall, toolCalls) - const currentContent = newAssistantThreadContent( - activeThread.id, - accumulatedText, - { - tool_calls: calls.map((e) => ({ - ...e, - state: 'pending', - })), - } - ) - updateStreamingContent(currentContent) - await new Promise((resolve) => setTimeout(resolve, 0)) + extractToolCall(part, currentCall, toolCalls) + // Schedule a flush to reflect tool update + scheduleFlush() } if (delta) { accumulatedText += delta - // Create a new object each time to avoid reference issues - // Use a timeout to prevent React from batching updates too quickly - const currentContent = newAssistantThreadContent( - activeThread.id, - accumulatedText, - { - tool_calls: toolCalls.map((e) => ({ - ...e, - state: 'pending', - })), - } - ) - updateStreamingContent(currentContent) - updateTokenSpeed(currentContent) - await new Promise((resolve) => setTimeout(resolve, 0)) + pendingDeltaCount += 1 + // Batch UI update on next animation frame + scheduleFlush() } } + // Ensure any pending buffered content is rendered at the end + flushIfPending() } } catch (error) { const errorMessage =