fix: chat completion usage - token speed (#6675)

This commit is contained in:
Louis 2025-10-01 14:19:21 +07:00 committed by GitHub
parent 0de5f17071
commit 157ecacb20
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 47 additions and 3 deletions

View File

@ -38,6 +38,11 @@ type AppState = {
updateTools: (tools: MCPTool[]) => void
setAbortController: (threadId: string, controller: AbortController) => void
updateTokenSpeed: (message: ThreadMessage, increment?: number) => void
setTokenSpeed: (
message: ThreadMessage,
speed: number,
completionTokens: number
) => void
resetTokenSpeed: () => void
clearAppState: () => void
setOutOfContextDialog: (show: boolean) => void
@ -96,6 +101,17 @@ export const useAppState = create<AppState>()((set) => ({
},
}))
},
setTokenSpeed: (message, speed, completionTokens) => {
set((state) => ({
tokenSpeed: {
...state.tokenSpeed,
lastTimestamp: new Date().getTime(),
tokenSpeed: speed,
tokenCount: completionTokens,
message: message.id,
},
}))
},
updateTokenSpeed: (message, increment = 1) =>
set((state) => {
const currentTimestamp = new Date().getTime() // Get current time in milliseconds

View File

@ -19,7 +19,10 @@ import {
} from '@/lib/completion'
import { CompletionMessagesBuilder } from '@/lib/messages'
import { renderInstructions } from '@/lib/instructionTemplate'
import { ChatCompletionMessageToolCall } from 'openai/resources'
import {
ChatCompletionMessageToolCall,
CompletionUsage,
} from 'openai/resources'
import { useServiceHub } from '@/hooks/useServiceHub'
import { useToolApproval } from '@/hooks/useToolApproval'
@ -42,6 +45,7 @@ export const useChat = () => {
updateStreamingContent,
updateLoadingModel,
setAbortController,
setTokenSpeed,
] = useAppState(
useShallow((state) => [
state.updateTokenSpeed,
@ -49,6 +53,7 @@ export const useChat = () => {
state.updateStreamingContent,
state.updateLoadingModel,
state.setAbortController,
state.setTokenSpeed,
])
)
const updatePromptProgress = useAppState(
@ -333,6 +338,8 @@ export const useChat = () => {
let accumulatedText = ''
const currentCall: ChatCompletionMessageToolCall | null = null
const toolCalls: ChatCompletionMessageToolCall[] = []
const timeToFirstToken = Date.now()
let tokenUsage: CompletionUsage | undefined = undefined
try {
if (isCompletionResponse(completion)) {
const message = completion.choices[0]?.message
@ -348,6 +355,9 @@ export const useChat = () => {
if (message?.tool_calls) {
toolCalls.push(...message.tool_calls)
}
if ('usage' in completion) {
tokenUsage = completion.usage
}
} else {
// High-throughput scheduler: batch UI updates on rAF (requestAnimationFrame)
let rafScheduled = false
@ -384,7 +394,14 @@ export const useChat = () => {
}
)
updateStreamingContent(currentContent)
if (pendingDeltaCount > 0) {
if (tokenUsage) {
setTokenSpeed(
currentContent,
tokenUsage.completion_tokens /
Math.max((Date.now() - timeToFirstToken) / 1000, 1),
tokenUsage.completion_tokens
)
} else if (pendingDeltaCount > 0) {
updateTokenSpeed(currentContent, pendingDeltaCount)
}
pendingDeltaCount = 0
@ -413,7 +430,14 @@ export const useChat = () => {
}
)
updateStreamingContent(currentContent)
if (pendingDeltaCount > 0) {
if (tokenUsage) {
setTokenSpeed(
currentContent,
tokenUsage.completion_tokens /
Math.max((Date.now() - timeToFirstToken) / 1000, 1),
tokenUsage.completion_tokens
)
} else if (pendingDeltaCount > 0) {
updateTokenSpeed(currentContent, pendingDeltaCount)
}
pendingDeltaCount = 0
@ -445,6 +469,10 @@ export const useChat = () => {
)
}
if ('usage' in part && part.usage) {
tokenUsage = part.usage
}
if (part.choices[0]?.delta?.tool_calls) {
extractToolCall(part, currentCall, toolCalls)
// Schedule a flush to reflect tool update