diff --git a/core/src/browser/extensions/engines/AIEngine.ts b/core/src/browser/extensions/engines/AIEngine.ts index a23e8c45e..8853e3334 100644 --- a/core/src/browser/extensions/engines/AIEngine.ts +++ b/core/src/browser/extensions/engines/AIEngine.ts @@ -6,6 +6,7 @@ import { EngineManager } from './EngineManager' export interface chatCompletionRequestMessage { role: 'system' | 'user' | 'assistant' | 'tool' content: string | null | Content[] // Content can be a string OR an array of content parts + reasoning?: string | null // Some models return reasoning in completed responses name?: string tool_calls?: any[] // Simplified tool_call_id?: string } diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts index 390ebba9e..d77d507c5 100644 --- a/web-app/src/hooks/useChat.ts +++ b/web-app/src/hooks/useChat.ts @@ -29,6 +29,10 @@ import { updateSettings } from '@/services/providers' import { useContextSizeApproval } from './useModelContextApproval' import { useModelLoad } from './useModelLoad' import { useGeneralSetting } from './useGeneralSetting' +import { + ReasoningProcessor, + extractReasoningFromMessage, +} from '@/utils/reasoning' export const useChat = () => { const { prompt, setPrompt } = usePrompt() @@ -285,16 +289,25 @@ export const useChat = () => { const toolCalls: ChatCompletionMessageToolCall[] = [] try { if (isCompletionResponse(completion)) { - accumulatedText = - (completion.choices[0]?.message?.content as string) || '' - if (completion.choices[0]?.message?.tool_calls) { - toolCalls.push(...completion.choices[0].message.tool_calls) + const message = completion.choices[0]?.message + accumulatedText = (message?.content as string) || '' + + // Handle reasoning field if there is one + const reasoning = extractReasoningFromMessage(message) + if (reasoning) { + accumulatedText = + `${reasoning}` + accumulatedText + } + + if (message?.tool_calls) { + toolCalls.push(...message.tool_calls) } } else { // High-throughput scheduler: batch UI updates on rAF (requestAnimationFrame) let rafScheduled = false let rafHandle: number | undefined let pendingDeltaCount = 0 + const reasoningProcessor = new ReasoningProcessor() const scheduleFlush = () => { if (rafScheduled) return rafScheduled = true @@ -328,7 +341,10 @@ export const useChat = () => { } const flushIfPending = () => { if (!rafScheduled) return - if (typeof cancelAnimationFrame !== 'undefined' && rafHandle !== undefined) { + if ( + typeof cancelAnimationFrame !== 'undefined' && + rafHandle !== undefined + ) { cancelAnimationFrame(rafHandle) } else if (rafHandle !== undefined) { clearTimeout(rafHandle) @@ -360,20 +376,30 @@ export const useChat = () => { : (JSON.stringify(part) ?? '') ) } - const delta = part.choices[0]?.delta?.content || '' if (part.choices[0]?.delta?.tool_calls) { extractToolCall(part, currentCall, toolCalls) // Schedule a flush to reflect tool update scheduleFlush() } - if (delta) { - accumulatedText += delta + const deltaReasoning = + reasoningProcessor.processReasoningChunk(part) + if (deltaReasoning) { + accumulatedText += deltaReasoning + pendingDeltaCount += 1 + // Schedule flush for reasoning updates + scheduleFlush() + } + const deltaContent = part.choices[0]?.delta?.content || '' + if (deltaContent) { + accumulatedText += deltaContent pendingDeltaCount += 1 // Batch UI update on next animation frame scheduleFlush() } } + // Finalize reasoning (close any open think tags) + accumulatedText += reasoningProcessor.finalize() // Ensure any pending buffered content is rendered at the end flushIfPending() } diff --git a/web-app/src/utils/reasoning.ts b/web-app/src/utils/reasoning.ts new file mode 100644 index 000000000..f102b8a76 --- /dev/null +++ b/web-app/src/utils/reasoning.ts @@ -0,0 +1,74 @@ +import { CompletionResponseChunk } from 'token.js' +import { + chatCompletionChunk, + ChatCompletionMessage, + chatCompletionRequestMessage, +} from '@janhq/core' + +// Extract reasoning from a message (for completed responses) +export function extractReasoningFromMessage( + message: chatCompletionRequestMessage | ChatCompletionMessage +): string | null { + if (!message) return null + + const extendedMessage = message as chatCompletionRequestMessage + return extendedMessage.reasoning || null +} + +// Extract reasoning from a chunk (for streaming responses) +function extractReasoningFromChunk( + chunk: CompletionResponseChunk | chatCompletionChunk +): string | null { + if (!chunk.choices?.[0]?.delta) return null + + const delta = chunk.choices[0].delta as chatCompletionRequestMessage + const reasoning = delta.reasoning + + // Return null for falsy values, non-strings, or whitespace-only strings + if (!reasoning || typeof reasoning !== 'string' || !reasoning.trim()) + return null + + return reasoning +} + +// Tracks reasoning state and appends reasoning tokens with proper think tags +export class ReasoningProcessor { + private isReasoningActive = false + + processReasoningChunk( + chunk: CompletionResponseChunk | chatCompletionChunk + ): string { + const reasoning = extractReasoningFromChunk(chunk) + const chunkContent = chunk.choices?.[0]?.delta?.content || '' + + // Handle reasoning tokens + if (reasoning) { + if (!this.isReasoningActive) { + this.isReasoningActive = true + return '' + reasoning + } + return reasoning + } + + // Handle reasoning end when content starts + if (this.isReasoningActive && chunkContent) { + this.isReasoningActive = false + return '' + } + + // No reasoning to process + return '' + } + + finalize(): string { + if (this.isReasoningActive) { + this.isReasoningActive = false + return '' + } + return '' + } + + isReasoningInProgress(): boolean { + return this.isReasoningActive + } +}