From 157ecacb207fc8d85210b8e709c9eea29c6e6f87 Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 1 Oct 2025 14:19:21 +0700 Subject: [PATCH] fix: chat completion usage - token speed (#6675) --- web-app/src/hooks/useAppState.ts | 16 +++++++++++++++ web-app/src/hooks/useChat.ts | 34 +++++++++++++++++++++++++++++--- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/web-app/src/hooks/useAppState.ts b/web-app/src/hooks/useAppState.ts index 59e2e6dda..646294a8d 100644 --- a/web-app/src/hooks/useAppState.ts +++ b/web-app/src/hooks/useAppState.ts @@ -38,6 +38,11 @@ type AppState = { updateTools: (tools: MCPTool[]) => void setAbortController: (threadId: string, controller: AbortController) => void updateTokenSpeed: (message: ThreadMessage, increment?: number) => void + setTokenSpeed: ( + message: ThreadMessage, + speed: number, + completionTokens: number + ) => void resetTokenSpeed: () => void clearAppState: () => void setOutOfContextDialog: (show: boolean) => void @@ -96,6 +101,17 @@ export const useAppState = create()((set) => ({ }, })) }, + setTokenSpeed: (message, speed, completionTokens) => { + set((state) => ({ + tokenSpeed: { + ...state.tokenSpeed, + lastTimestamp: new Date().getTime(), + tokenSpeed: speed, + tokenCount: completionTokens, + message: message.id, + }, + })) + }, updateTokenSpeed: (message, increment = 1) => set((state) => { const currentTimestamp = new Date().getTime() // Get current time in milliseconds diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts index 935458326..d17f87ab2 100644 --- a/web-app/src/hooks/useChat.ts +++ b/web-app/src/hooks/useChat.ts @@ -19,7 +19,10 @@ import { } from '@/lib/completion' import { CompletionMessagesBuilder } from '@/lib/messages' import { renderInstructions } from '@/lib/instructionTemplate' -import { ChatCompletionMessageToolCall } from 'openai/resources' +import { + ChatCompletionMessageToolCall, + CompletionUsage, +} from 'openai/resources' import { useServiceHub } from '@/hooks/useServiceHub' import { useToolApproval } from '@/hooks/useToolApproval' @@ -42,6 +45,7 @@ export const useChat = () => { updateStreamingContent, updateLoadingModel, setAbortController, + setTokenSpeed, ] = useAppState( useShallow((state) => [ state.updateTokenSpeed, @@ -49,6 +53,7 @@ export const useChat = () => { state.updateStreamingContent, state.updateLoadingModel, state.setAbortController, + state.setTokenSpeed, ]) ) const updatePromptProgress = useAppState( @@ -333,6 +338,8 @@ export const useChat = () => { let accumulatedText = '' const currentCall: ChatCompletionMessageToolCall | null = null const toolCalls: ChatCompletionMessageToolCall[] = [] + const timeToFirstToken = Date.now() + let tokenUsage: CompletionUsage | undefined = undefined try { if (isCompletionResponse(completion)) { const message = completion.choices[0]?.message @@ -348,6 +355,9 @@ export const useChat = () => { if (message?.tool_calls) { toolCalls.push(...message.tool_calls) } + if ('usage' in completion) { + tokenUsage = completion.usage + } } else { // High-throughput scheduler: batch UI updates on rAF (requestAnimationFrame) let rafScheduled = false @@ -384,7 +394,14 @@ export const useChat = () => { } ) updateStreamingContent(currentContent) - if (pendingDeltaCount > 0) { + if (tokenUsage) { + setTokenSpeed( + currentContent, + tokenUsage.completion_tokens / + Math.max((Date.now() - timeToFirstToken) / 1000, 1), + tokenUsage.completion_tokens + ) + } else if (pendingDeltaCount > 0) { updateTokenSpeed(currentContent, pendingDeltaCount) } pendingDeltaCount = 0 @@ -413,7 +430,14 @@ export const useChat = () => { } ) updateStreamingContent(currentContent) - if (pendingDeltaCount > 0) { + if (tokenUsage) { + setTokenSpeed( + currentContent, + tokenUsage.completion_tokens / + Math.max((Date.now() - timeToFirstToken) / 1000, 1), + tokenUsage.completion_tokens + ) + } else if (pendingDeltaCount > 0) { updateTokenSpeed(currentContent, pendingDeltaCount) } pendingDeltaCount = 0 @@ -445,6 +469,10 @@ export const useChat = () => { ) } + if ('usage' in part && part.usage) { + tokenUsage = part.usage + } + if (part.choices[0]?.delta?.tool_calls) { extractToolCall(part, currentCall, toolCalls) // Schedule a flush to reflect tool update