fix: chat completion usage - token speed (#6675)
This commit is contained in:
parent
0de5f17071
commit
157ecacb20
@ -38,6 +38,11 @@ type AppState = {
|
|||||||
updateTools: (tools: MCPTool[]) => void
|
updateTools: (tools: MCPTool[]) => void
|
||||||
setAbortController: (threadId: string, controller: AbortController) => void
|
setAbortController: (threadId: string, controller: AbortController) => void
|
||||||
updateTokenSpeed: (message: ThreadMessage, increment?: number) => void
|
updateTokenSpeed: (message: ThreadMessage, increment?: number) => void
|
||||||
|
setTokenSpeed: (
|
||||||
|
message: ThreadMessage,
|
||||||
|
speed: number,
|
||||||
|
completionTokens: number
|
||||||
|
) => void
|
||||||
resetTokenSpeed: () => void
|
resetTokenSpeed: () => void
|
||||||
clearAppState: () => void
|
clearAppState: () => void
|
||||||
setOutOfContextDialog: (show: boolean) => void
|
setOutOfContextDialog: (show: boolean) => void
|
||||||
@ -96,6 +101,17 @@ export const useAppState = create<AppState>()((set) => ({
|
|||||||
},
|
},
|
||||||
}))
|
}))
|
||||||
},
|
},
|
||||||
|
setTokenSpeed: (message, speed, completionTokens) => {
|
||||||
|
set((state) => ({
|
||||||
|
tokenSpeed: {
|
||||||
|
...state.tokenSpeed,
|
||||||
|
lastTimestamp: new Date().getTime(),
|
||||||
|
tokenSpeed: speed,
|
||||||
|
tokenCount: completionTokens,
|
||||||
|
message: message.id,
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
},
|
||||||
updateTokenSpeed: (message, increment = 1) =>
|
updateTokenSpeed: (message, increment = 1) =>
|
||||||
set((state) => {
|
set((state) => {
|
||||||
const currentTimestamp = new Date().getTime() // Get current time in milliseconds
|
const currentTimestamp = new Date().getTime() // Get current time in milliseconds
|
||||||
|
|||||||
@ -19,7 +19,10 @@ import {
|
|||||||
} from '@/lib/completion'
|
} from '@/lib/completion'
|
||||||
import { CompletionMessagesBuilder } from '@/lib/messages'
|
import { CompletionMessagesBuilder } from '@/lib/messages'
|
||||||
import { renderInstructions } from '@/lib/instructionTemplate'
|
import { renderInstructions } from '@/lib/instructionTemplate'
|
||||||
import { ChatCompletionMessageToolCall } from 'openai/resources'
|
import {
|
||||||
|
ChatCompletionMessageToolCall,
|
||||||
|
CompletionUsage,
|
||||||
|
} from 'openai/resources'
|
||||||
|
|
||||||
import { useServiceHub } from '@/hooks/useServiceHub'
|
import { useServiceHub } from '@/hooks/useServiceHub'
|
||||||
import { useToolApproval } from '@/hooks/useToolApproval'
|
import { useToolApproval } from '@/hooks/useToolApproval'
|
||||||
@ -42,6 +45,7 @@ export const useChat = () => {
|
|||||||
updateStreamingContent,
|
updateStreamingContent,
|
||||||
updateLoadingModel,
|
updateLoadingModel,
|
||||||
setAbortController,
|
setAbortController,
|
||||||
|
setTokenSpeed,
|
||||||
] = useAppState(
|
] = useAppState(
|
||||||
useShallow((state) => [
|
useShallow((state) => [
|
||||||
state.updateTokenSpeed,
|
state.updateTokenSpeed,
|
||||||
@ -49,6 +53,7 @@ export const useChat = () => {
|
|||||||
state.updateStreamingContent,
|
state.updateStreamingContent,
|
||||||
state.updateLoadingModel,
|
state.updateLoadingModel,
|
||||||
state.setAbortController,
|
state.setAbortController,
|
||||||
|
state.setTokenSpeed,
|
||||||
])
|
])
|
||||||
)
|
)
|
||||||
const updatePromptProgress = useAppState(
|
const updatePromptProgress = useAppState(
|
||||||
@ -333,6 +338,8 @@ export const useChat = () => {
|
|||||||
let accumulatedText = ''
|
let accumulatedText = ''
|
||||||
const currentCall: ChatCompletionMessageToolCall | null = null
|
const currentCall: ChatCompletionMessageToolCall | null = null
|
||||||
const toolCalls: ChatCompletionMessageToolCall[] = []
|
const toolCalls: ChatCompletionMessageToolCall[] = []
|
||||||
|
const timeToFirstToken = Date.now()
|
||||||
|
let tokenUsage: CompletionUsage | undefined = undefined
|
||||||
try {
|
try {
|
||||||
if (isCompletionResponse(completion)) {
|
if (isCompletionResponse(completion)) {
|
||||||
const message = completion.choices[0]?.message
|
const message = completion.choices[0]?.message
|
||||||
@ -348,6 +355,9 @@ export const useChat = () => {
|
|||||||
if (message?.tool_calls) {
|
if (message?.tool_calls) {
|
||||||
toolCalls.push(...message.tool_calls)
|
toolCalls.push(...message.tool_calls)
|
||||||
}
|
}
|
||||||
|
if ('usage' in completion) {
|
||||||
|
tokenUsage = completion.usage
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// High-throughput scheduler: batch UI updates on rAF (requestAnimationFrame)
|
// High-throughput scheduler: batch UI updates on rAF (requestAnimationFrame)
|
||||||
let rafScheduled = false
|
let rafScheduled = false
|
||||||
@ -384,7 +394,14 @@ export const useChat = () => {
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
updateStreamingContent(currentContent)
|
updateStreamingContent(currentContent)
|
||||||
if (pendingDeltaCount > 0) {
|
if (tokenUsage) {
|
||||||
|
setTokenSpeed(
|
||||||
|
currentContent,
|
||||||
|
tokenUsage.completion_tokens /
|
||||||
|
Math.max((Date.now() - timeToFirstToken) / 1000, 1),
|
||||||
|
tokenUsage.completion_tokens
|
||||||
|
)
|
||||||
|
} else if (pendingDeltaCount > 0) {
|
||||||
updateTokenSpeed(currentContent, pendingDeltaCount)
|
updateTokenSpeed(currentContent, pendingDeltaCount)
|
||||||
}
|
}
|
||||||
pendingDeltaCount = 0
|
pendingDeltaCount = 0
|
||||||
@ -413,7 +430,14 @@ export const useChat = () => {
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
updateStreamingContent(currentContent)
|
updateStreamingContent(currentContent)
|
||||||
if (pendingDeltaCount > 0) {
|
if (tokenUsage) {
|
||||||
|
setTokenSpeed(
|
||||||
|
currentContent,
|
||||||
|
tokenUsage.completion_tokens /
|
||||||
|
Math.max((Date.now() - timeToFirstToken) / 1000, 1),
|
||||||
|
tokenUsage.completion_tokens
|
||||||
|
)
|
||||||
|
} else if (pendingDeltaCount > 0) {
|
||||||
updateTokenSpeed(currentContent, pendingDeltaCount)
|
updateTokenSpeed(currentContent, pendingDeltaCount)
|
||||||
}
|
}
|
||||||
pendingDeltaCount = 0
|
pendingDeltaCount = 0
|
||||||
@ -445,6 +469,10 @@ export const useChat = () => {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ('usage' in part && part.usage) {
|
||||||
|
tokenUsage = part.usage
|
||||||
|
}
|
||||||
|
|
||||||
if (part.choices[0]?.delta?.tool_calls) {
|
if (part.choices[0]?.delta?.tool_calls) {
|
||||||
extractToolCall(part, currentCall, toolCalls)
|
extractToolCall(part, currentCall, toolCalls)
|
||||||
// Schedule a flush to reflect tool update
|
// Schedule a flush to reflect tool update
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user