Fix Issue #6199

Fix Issue: Jan UI Bottlenecks Token Rendering Speed to ~300 TPS Despite Faster Cerebras API Output
2025-08-15 15:00:29 -07:00 · 2025-08-15 15:00:29 -07:00 · 4ba56f1377
commit 4ba56f1377
parent 56fa4f9677
2 changed files with 72 additions and 32 deletions
--- a/web-app/src/hooks/useAppState.ts
+++ b/web-app/src/hooks/useAppState.ts
@ -21,7 +21,7 @@ type AppState = {
  updateLoadingModel: (loading: boolean) => void
  updateTools: (tools: MCPTool[]) => void
  setAbortController: (threadId: string, controller: AbortController) => void
-  updateTokenSpeed: (message: ThreadMessage) => void
+  updateTokenSpeed: (message: ThreadMessage, increment?: number) => void
  resetTokenSpeed: () => void
  setOutOfContextDialog: (show: boolean) => void
 }
@ -74,7 +74,7 @@ export const useAppState = create<AppState>()((set) => ({
      },
    }))
  },
-  updateTokenSpeed: (message) =>
+  updateTokenSpeed: (message, increment = 1) =>
    set((state) => {
      const currentTimestamp = new Date().getTime() // Get current time in milliseconds
      if (!state.tokenSpeed) {
@ -83,7 +83,7 @@ export const useAppState = create<AppState>()((set) => ({
          tokenSpeed: {
            lastTimestamp: currentTimestamp,
            tokenSpeed: 0,
-            tokenCount: 1,
+            tokenCount: increment,
            message: message.id,
          },
        }
@ -91,7 +91,7 @@ export const useAppState = create<AppState>()((set) => ({
      const timeDiffInSeconds =
        (currentTimestamp - state.tokenSpeed.lastTimestamp) / 1000 // Time difference in seconds
-      const totalTokenCount = state.tokenSpeed.tokenCount + 1
+      const totalTokenCount = state.tokenSpeed.tokenCount + increment
      const averageTokenSpeed =
        totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1) // Calculate average token speed
      return {
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@ -311,6 +311,66 @@ export const useChat = () => {
                toolCalls.push(...completion.choices[0].message.tool_calls)
              }
            } else {
              // High-throughput scheduler: batch UI updates on rAF (requestAnimationFrame)
              let rafScheduled = false
              let rafHandle: number | undefined
              let pendingDeltaCount = 0
              const scheduleFlush = () => {
                if (rafScheduled) return
                rafScheduled = true
                const doSchedule = (cb: () => void) => {
                  if (typeof requestAnimationFrame !== 'undefined') {
                    rafHandle = requestAnimationFrame(() => cb())
                  } else {
                    // Fallback for non-browser test environments
                    const t = setTimeout(() => cb(), 0) as unknown as number
                    rafHandle = t
                  }
                }
                doSchedule(() => {
                  const currentContent = newAssistantThreadContent(
                    activeThread.id,
                    accumulatedText,
                    {
                      tool_calls: toolCalls.map((e) => ({
                        ...e,
                        state: 'pending',
                      })),
                    }
                  )
                  updateStreamingContent(currentContent)
                  if (pendingDeltaCount > 0) {
                    updateTokenSpeed(currentContent, pendingDeltaCount)
                  }
                  pendingDeltaCount = 0
                  rafScheduled = false
                })
              }
              const flushIfPending = () => {
                if (!rafScheduled) return
                if (typeof cancelAnimationFrame !== 'undefined' && rafHandle !== undefined) {
                  cancelAnimationFrame(rafHandle)
                } else if (rafHandle !== undefined) {
                  clearTimeout(rafHandle)
                }
                // Do an immediate flush
                const currentContent = newAssistantThreadContent(
                  activeThread.id,
                  accumulatedText,
                  {
                    tool_calls: toolCalls.map((e) => ({
                      ...e,
                      state: 'pending',
                    })),
                  }
                )
                updateStreamingContent(currentContent)
                if (pendingDeltaCount > 0) {
                  updateTokenSpeed(currentContent, pendingDeltaCount)
                }
                pendingDeltaCount = 0
                rafScheduled = false
              }
              for await (const part of completion) {
                // Error message
                if (!part.choices) {
@ -323,39 +383,19 @@ export const useChat = () => {
                const delta = part.choices[0]?.delta?.content || ''
                if (part.choices[0]?.delta?.tool_calls) {
-                  const calls = extractToolCall(part, currentCall, toolCalls)
+                  extractToolCall(part, currentCall, toolCalls)
-                  const currentContent = newAssistantThreadContent(
+                  // Schedule a flush to reflect tool update
-                    activeThread.id,
+                  scheduleFlush()
                    accumulatedText,
                    {
                      tool_calls: calls.map((e) => ({
                        ...e,
                        state: 'pending',
                      })),
                    }
                  )
                  updateStreamingContent(currentContent)
                  await new Promise((resolve) => setTimeout(resolve, 0))
                }
                if (delta) {
                  accumulatedText += delta
-                  // Create a new object each time to avoid reference issues
+                  pendingDeltaCount += 1
-                  // Use a timeout to prevent React from batching updates too quickly
+                  // Batch UI update on next animation frame
-                  const currentContent = newAssistantThreadContent(
+                  scheduleFlush()
                    activeThread.id,
                    accumulatedText,
                    {
                      tool_calls: toolCalls.map((e) => ({
                        ...e,
                        state: 'pending',
                      })),
                    }
                  )
                  updateStreamingContent(currentContent)
                  updateTokenSpeed(currentContent)
                  await new Promise((resolve) => setTimeout(resolve, 0))
                }
              }
              // Ensure any pending buffered content is rendered at the end
              flushIfPending()
            }
          } catch (error) {
            const errorMessage =