Fix Issue #6199

Fix Issue: Jan UI Bottlenecks Token Rendering Speed to ~300 TPS Despite Faster Cerebras API Output
2025-08-15 15:00:29 -07:00 · 2025-08-15 15:00:29 -07:00 · 4ba56f1377
commit 4ba56f1377
parent 56fa4f9677
2 changed files with 72 additions and 32 deletions
--- a/web-app/src/hooks/useAppState.ts
+++ b/web-app/src/hooks/useAppState.ts
@ -21,7 +21,7 @@ type AppState = {
  updateLoadingModel: (loading: boolean) => void
  updateTools: (tools: MCPTool[]) => void
  setAbortController: (threadId: string, controller: AbortController) => void
-  updateTokenSpeed: (message: ThreadMessage) => void
+  updateTokenSpeed: (message: ThreadMessage, increment?: number) => void
  resetTokenSpeed: () => void
  setOutOfContextDialog: (show: boolean) => void
 }
@ -74,7 +74,7 @@ export const useAppState = create<AppState>()((set) => ({
      },
    }))
  },
-  updateTokenSpeed: (message) =>
+  updateTokenSpeed: (message, increment = 1) =>
    set((state) => {
      const currentTimestamp = new Date().getTime() // Get current time in milliseconds
      if (!state.tokenSpeed) {
@ -83,7 +83,7 @@ export const useAppState = create<AppState>()((set) => ({
          tokenSpeed: {
            lastTimestamp: currentTimestamp,
            tokenSpeed: 0,
-            tokenCount: 1,
+            tokenCount: increment,
            message: message.id,
          },
        }
@ -91,7 +91,7 @@ export const useAppState = create<AppState>()((set) => ({

      const timeDiffInSeconds =
        (currentTimestamp - state.tokenSpeed.lastTimestamp) / 1000 // Time difference in seconds
-      const totalTokenCount = state.tokenSpeed.tokenCount + 1
+      const totalTokenCount = state.tokenSpeed.tokenCount + increment
      const averageTokenSpeed =
        totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1) // Calculate average token speed
      return {
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@ -311,6 +311,66 @@ export const useChat = () => {
                toolCalls.push(...completion.choices[0].message.tool_calls)
              }
            } else {
+              // High-throughput scheduler: batch UI updates on rAF (requestAnimationFrame)
+              let rafScheduled = false
+              let rafHandle: number | undefined
+              let pendingDeltaCount = 0
+              const scheduleFlush = () => {
+                if (rafScheduled) return
+                rafScheduled = true
+                const doSchedule = (cb: () => void) => {
+                  if (typeof requestAnimationFrame !== 'undefined') {
+                    rafHandle = requestAnimationFrame(() => cb())
+                  } else {
+                    // Fallback for non-browser test environments
+                    const t = setTimeout(() => cb(), 0) as unknown as number
+                    rafHandle = t
+                  }
+                }
+                doSchedule(() => {
+                  const currentContent = newAssistantThreadContent(
+                    activeThread.id,
+                    accumulatedText,
+                    {
+                      tool_calls: toolCalls.map((e) => ({
+                        ...e,
+                        state: 'pending',
+                      })),
+                    }
+                  )
+                  updateStreamingContent(currentContent)
+                  if (pendingDeltaCount > 0) {
+                    updateTokenSpeed(currentContent, pendingDeltaCount)
+                  }
+                  pendingDeltaCount = 0
+                  rafScheduled = false
+                })
+              }
+              const flushIfPending = () => {
+                if (!rafScheduled) return
+                if (typeof cancelAnimationFrame !== 'undefined' && rafHandle !== undefined) {
+                  cancelAnimationFrame(rafHandle)
+                } else if (rafHandle !== undefined) {
+                  clearTimeout(rafHandle)
+                }
+                // Do an immediate flush
+                const currentContent = newAssistantThreadContent(
+                  activeThread.id,
+                  accumulatedText,
+                  {
+                    tool_calls: toolCalls.map((e) => ({
+                      ...e,
+                      state: 'pending',
+                    })),
+                  }
+                )
+                updateStreamingContent(currentContent)
+                if (pendingDeltaCount > 0) {
+                  updateTokenSpeed(currentContent, pendingDeltaCount)
+                }
+                pendingDeltaCount = 0
+                rafScheduled = false
+              }
              for await (const part of completion) {
                // Error message
                if (!part.choices) {
@ -323,39 +383,19 @@ export const useChat = () => {
                const delta = part.choices[0]?.delta?.content || ''

                if (part.choices[0]?.delta?.tool_calls) {
-                  const calls = extractToolCall(part, currentCall, toolCalls)
-                  const currentContent = newAssistantThreadContent(
-                    activeThread.id,
-                    accumulatedText,
-                    {
-                      tool_calls: calls.map((e) => ({
-                        ...e,
-                        state: 'pending',
-                      })),
-                    }
-                  )
-                  updateStreamingContent(currentContent)
-                  await new Promise((resolve) => setTimeout(resolve, 0))
+                  extractToolCall(part, currentCall, toolCalls)
+                  // Schedule a flush to reflect tool update
+                  scheduleFlush()
                }
                if (delta) {
                  accumulatedText += delta
-                  // Create a new object each time to avoid reference issues
-                  // Use a timeout to prevent React from batching updates too quickly
-                  const currentContent = newAssistantThreadContent(
-                    activeThread.id,
-                    accumulatedText,
-                    {
-                      tool_calls: toolCalls.map((e) => ({
-                        ...e,
-                        state: 'pending',
-                      })),
-                    }
-                  )
-                  updateStreamingContent(currentContent)
-                  updateTokenSpeed(currentContent)
-                  await new Promise((resolve) => setTimeout(resolve, 0))
+                  pendingDeltaCount += 1
+                  // Batch UI update on next animation frame
+                  scheduleFlush()
                }
              }
+              // Ensure any pending buffered content is rendered at the end
+              flushIfPending()
            }
          } catch (error) {
            const errorMessage =