fix: chat completion usage - token speed (#6675)
This commit is contained in:
parent
0de5f17071
commit
157ecacb20
@ -38,6 +38,11 @@ type AppState = {
|
||||
updateTools: (tools: MCPTool[]) => void
|
||||
setAbortController: (threadId: string, controller: AbortController) => void
|
||||
updateTokenSpeed: (message: ThreadMessage, increment?: number) => void
|
||||
setTokenSpeed: (
|
||||
message: ThreadMessage,
|
||||
speed: number,
|
||||
completionTokens: number
|
||||
) => void
|
||||
resetTokenSpeed: () => void
|
||||
clearAppState: () => void
|
||||
setOutOfContextDialog: (show: boolean) => void
|
||||
@ -96,6 +101,17 @@ export const useAppState = create<AppState>()((set) => ({
|
||||
},
|
||||
}))
|
||||
},
|
||||
setTokenSpeed: (message, speed, completionTokens) => {
|
||||
set((state) => ({
|
||||
tokenSpeed: {
|
||||
...state.tokenSpeed,
|
||||
lastTimestamp: new Date().getTime(),
|
||||
tokenSpeed: speed,
|
||||
tokenCount: completionTokens,
|
||||
message: message.id,
|
||||
},
|
||||
}))
|
||||
},
|
||||
updateTokenSpeed: (message, increment = 1) =>
|
||||
set((state) => {
|
||||
const currentTimestamp = new Date().getTime() // Get current time in milliseconds
|
||||
|
||||
@ -19,7 +19,10 @@ import {
|
||||
} from '@/lib/completion'
|
||||
import { CompletionMessagesBuilder } from '@/lib/messages'
|
||||
import { renderInstructions } from '@/lib/instructionTemplate'
|
||||
import { ChatCompletionMessageToolCall } from 'openai/resources'
|
||||
import {
|
||||
ChatCompletionMessageToolCall,
|
||||
CompletionUsage,
|
||||
} from 'openai/resources'
|
||||
|
||||
import { useServiceHub } from '@/hooks/useServiceHub'
|
||||
import { useToolApproval } from '@/hooks/useToolApproval'
|
||||
@ -42,6 +45,7 @@ export const useChat = () => {
|
||||
updateStreamingContent,
|
||||
updateLoadingModel,
|
||||
setAbortController,
|
||||
setTokenSpeed,
|
||||
] = useAppState(
|
||||
useShallow((state) => [
|
||||
state.updateTokenSpeed,
|
||||
@ -49,6 +53,7 @@ export const useChat = () => {
|
||||
state.updateStreamingContent,
|
||||
state.updateLoadingModel,
|
||||
state.setAbortController,
|
||||
state.setTokenSpeed,
|
||||
])
|
||||
)
|
||||
const updatePromptProgress = useAppState(
|
||||
@ -333,6 +338,8 @@ export const useChat = () => {
|
||||
let accumulatedText = ''
|
||||
const currentCall: ChatCompletionMessageToolCall | null = null
|
||||
const toolCalls: ChatCompletionMessageToolCall[] = []
|
||||
const timeToFirstToken = Date.now()
|
||||
let tokenUsage: CompletionUsage | undefined = undefined
|
||||
try {
|
||||
if (isCompletionResponse(completion)) {
|
||||
const message = completion.choices[0]?.message
|
||||
@ -348,6 +355,9 @@ export const useChat = () => {
|
||||
if (message?.tool_calls) {
|
||||
toolCalls.push(...message.tool_calls)
|
||||
}
|
||||
if ('usage' in completion) {
|
||||
tokenUsage = completion.usage
|
||||
}
|
||||
} else {
|
||||
// High-throughput scheduler: batch UI updates on rAF (requestAnimationFrame)
|
||||
let rafScheduled = false
|
||||
@ -384,7 +394,14 @@ export const useChat = () => {
|
||||
}
|
||||
)
|
||||
updateStreamingContent(currentContent)
|
||||
if (pendingDeltaCount > 0) {
|
||||
if (tokenUsage) {
|
||||
setTokenSpeed(
|
||||
currentContent,
|
||||
tokenUsage.completion_tokens /
|
||||
Math.max((Date.now() - timeToFirstToken) / 1000, 1),
|
||||
tokenUsage.completion_tokens
|
||||
)
|
||||
} else if (pendingDeltaCount > 0) {
|
||||
updateTokenSpeed(currentContent, pendingDeltaCount)
|
||||
}
|
||||
pendingDeltaCount = 0
|
||||
@ -413,7 +430,14 @@ export const useChat = () => {
|
||||
}
|
||||
)
|
||||
updateStreamingContent(currentContent)
|
||||
if (pendingDeltaCount > 0) {
|
||||
if (tokenUsage) {
|
||||
setTokenSpeed(
|
||||
currentContent,
|
||||
tokenUsage.completion_tokens /
|
||||
Math.max((Date.now() - timeToFirstToken) / 1000, 1),
|
||||
tokenUsage.completion_tokens
|
||||
)
|
||||
} else if (pendingDeltaCount > 0) {
|
||||
updateTokenSpeed(currentContent, pendingDeltaCount)
|
||||
}
|
||||
pendingDeltaCount = 0
|
||||
@ -445,6 +469,10 @@ export const useChat = () => {
|
||||
)
|
||||
}
|
||||
|
||||
if ('usage' in part && part.usage) {
|
||||
tokenUsage = part.usage
|
||||
}
|
||||
|
||||
if (part.choices[0]?.delta?.tool_calls) {
|
||||
extractToolCall(part, currentCall, toolCalls)
|
||||
// Schedule a flush to reflect tool update
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user