Fix Issue #6199
Fix Issue: Jan UI Bottlenecks Token Rendering Speed to ~300 TPS Despite Faster Cerebras API Output
This commit is contained in:
parent
56fa4f9677
commit
4ba56f1377
@ -21,7 +21,7 @@ type AppState = {
|
|||||||
updateLoadingModel: (loading: boolean) => void
|
updateLoadingModel: (loading: boolean) => void
|
||||||
updateTools: (tools: MCPTool[]) => void
|
updateTools: (tools: MCPTool[]) => void
|
||||||
setAbortController: (threadId: string, controller: AbortController) => void
|
setAbortController: (threadId: string, controller: AbortController) => void
|
||||||
updateTokenSpeed: (message: ThreadMessage) => void
|
updateTokenSpeed: (message: ThreadMessage, increment?: number) => void
|
||||||
resetTokenSpeed: () => void
|
resetTokenSpeed: () => void
|
||||||
setOutOfContextDialog: (show: boolean) => void
|
setOutOfContextDialog: (show: boolean) => void
|
||||||
}
|
}
|
||||||
@ -74,7 +74,7 @@ export const useAppState = create<AppState>()((set) => ({
|
|||||||
},
|
},
|
||||||
}))
|
}))
|
||||||
},
|
},
|
||||||
updateTokenSpeed: (message) =>
|
updateTokenSpeed: (message, increment = 1) =>
|
||||||
set((state) => {
|
set((state) => {
|
||||||
const currentTimestamp = new Date().getTime() // Get current time in milliseconds
|
const currentTimestamp = new Date().getTime() // Get current time in milliseconds
|
||||||
if (!state.tokenSpeed) {
|
if (!state.tokenSpeed) {
|
||||||
@ -83,7 +83,7 @@ export const useAppState = create<AppState>()((set) => ({
|
|||||||
tokenSpeed: {
|
tokenSpeed: {
|
||||||
lastTimestamp: currentTimestamp,
|
lastTimestamp: currentTimestamp,
|
||||||
tokenSpeed: 0,
|
tokenSpeed: 0,
|
||||||
tokenCount: 1,
|
tokenCount: increment,
|
||||||
message: message.id,
|
message: message.id,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -91,7 +91,7 @@ export const useAppState = create<AppState>()((set) => ({
|
|||||||
|
|
||||||
const timeDiffInSeconds =
|
const timeDiffInSeconds =
|
||||||
(currentTimestamp - state.tokenSpeed.lastTimestamp) / 1000 // Time difference in seconds
|
(currentTimestamp - state.tokenSpeed.lastTimestamp) / 1000 // Time difference in seconds
|
||||||
const totalTokenCount = state.tokenSpeed.tokenCount + 1
|
const totalTokenCount = state.tokenSpeed.tokenCount + increment
|
||||||
const averageTokenSpeed =
|
const averageTokenSpeed =
|
||||||
totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1) // Calculate average token speed
|
totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1) // Calculate average token speed
|
||||||
return {
|
return {
|
||||||
|
|||||||
@ -311,6 +311,66 @@ export const useChat = () => {
|
|||||||
toolCalls.push(...completion.choices[0].message.tool_calls)
|
toolCalls.push(...completion.choices[0].message.tool_calls)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// High-throughput scheduler: batch UI updates on rAF (requestAnimationFrame)
|
||||||
|
let rafScheduled = false
|
||||||
|
let rafHandle: number | undefined
|
||||||
|
let pendingDeltaCount = 0
|
||||||
|
const scheduleFlush = () => {
|
||||||
|
if (rafScheduled) return
|
||||||
|
rafScheduled = true
|
||||||
|
const doSchedule = (cb: () => void) => {
|
||||||
|
if (typeof requestAnimationFrame !== 'undefined') {
|
||||||
|
rafHandle = requestAnimationFrame(() => cb())
|
||||||
|
} else {
|
||||||
|
// Fallback for non-browser test environments
|
||||||
|
const t = setTimeout(() => cb(), 0) as unknown as number
|
||||||
|
rafHandle = t
|
||||||
|
}
|
||||||
|
}
|
||||||
|
doSchedule(() => {
|
||||||
|
const currentContent = newAssistantThreadContent(
|
||||||
|
activeThread.id,
|
||||||
|
accumulatedText,
|
||||||
|
{
|
||||||
|
tool_calls: toolCalls.map((e) => ({
|
||||||
|
...e,
|
||||||
|
state: 'pending',
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
updateStreamingContent(currentContent)
|
||||||
|
if (pendingDeltaCount > 0) {
|
||||||
|
updateTokenSpeed(currentContent, pendingDeltaCount)
|
||||||
|
}
|
||||||
|
pendingDeltaCount = 0
|
||||||
|
rafScheduled = false
|
||||||
|
})
|
||||||
|
}
|
||||||
|
const flushIfPending = () => {
|
||||||
|
if (!rafScheduled) return
|
||||||
|
if (typeof cancelAnimationFrame !== 'undefined' && rafHandle !== undefined) {
|
||||||
|
cancelAnimationFrame(rafHandle)
|
||||||
|
} else if (rafHandle !== undefined) {
|
||||||
|
clearTimeout(rafHandle)
|
||||||
|
}
|
||||||
|
// Do an immediate flush
|
||||||
|
const currentContent = newAssistantThreadContent(
|
||||||
|
activeThread.id,
|
||||||
|
accumulatedText,
|
||||||
|
{
|
||||||
|
tool_calls: toolCalls.map((e) => ({
|
||||||
|
...e,
|
||||||
|
state: 'pending',
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
updateStreamingContent(currentContent)
|
||||||
|
if (pendingDeltaCount > 0) {
|
||||||
|
updateTokenSpeed(currentContent, pendingDeltaCount)
|
||||||
|
}
|
||||||
|
pendingDeltaCount = 0
|
||||||
|
rafScheduled = false
|
||||||
|
}
|
||||||
for await (const part of completion) {
|
for await (const part of completion) {
|
||||||
// Error message
|
// Error message
|
||||||
if (!part.choices) {
|
if (!part.choices) {
|
||||||
@ -323,39 +383,19 @@ export const useChat = () => {
|
|||||||
const delta = part.choices[0]?.delta?.content || ''
|
const delta = part.choices[0]?.delta?.content || ''
|
||||||
|
|
||||||
if (part.choices[0]?.delta?.tool_calls) {
|
if (part.choices[0]?.delta?.tool_calls) {
|
||||||
const calls = extractToolCall(part, currentCall, toolCalls)
|
extractToolCall(part, currentCall, toolCalls)
|
||||||
const currentContent = newAssistantThreadContent(
|
// Schedule a flush to reflect tool update
|
||||||
activeThread.id,
|
scheduleFlush()
|
||||||
accumulatedText,
|
|
||||||
{
|
|
||||||
tool_calls: calls.map((e) => ({
|
|
||||||
...e,
|
|
||||||
state: 'pending',
|
|
||||||
})),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
updateStreamingContent(currentContent)
|
|
||||||
await new Promise((resolve) => setTimeout(resolve, 0))
|
|
||||||
}
|
}
|
||||||
if (delta) {
|
if (delta) {
|
||||||
accumulatedText += delta
|
accumulatedText += delta
|
||||||
// Create a new object each time to avoid reference issues
|
pendingDeltaCount += 1
|
||||||
// Use a timeout to prevent React from batching updates too quickly
|
// Batch UI update on next animation frame
|
||||||
const currentContent = newAssistantThreadContent(
|
scheduleFlush()
|
||||||
activeThread.id,
|
|
||||||
accumulatedText,
|
|
||||||
{
|
|
||||||
tool_calls: toolCalls.map((e) => ({
|
|
||||||
...e,
|
|
||||||
state: 'pending',
|
|
||||||
})),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
updateStreamingContent(currentContent)
|
|
||||||
updateTokenSpeed(currentContent)
|
|
||||||
await new Promise((resolve) => setTimeout(resolve, 0))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Ensure any pending buffered content is rendered at the end
|
||||||
|
flushIfPending()
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorMessage =
|
const errorMessage =
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user