chore: re enable reasoning_content in backend (#6228)

* chore: re enable reasoning_content in backend

* chore: handle reasoning_content

* chore: refactor get reasoning content

* chore: update PR review

---------

Co-authored-by: Faisal Amir <urmauur@gmail.com>
This commit is contained in:
Akarshan Biswas 2025-08-20 13:06:21 +05:30 committed by GitHub
parent 43ab6dfc65
commit 906b87022d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 390 additions and 4 deletions

View File

@ -7,6 +7,7 @@ export interface chatCompletionRequestMessage {
role: 'system' | 'user' | 'assistant' | 'tool' role: 'system' | 'user' | 'assistant' | 'tool'
content: string | null | Content[] // Content can be a string OR an array of content parts content: string | null | Content[] // Content can be a string OR an array of content parts
reasoning?: string | null // Some models return reasoning in completed responses reasoning?: string | null // Some models return reasoning in completed responses
reasoning_content?: string | null // Some models return reasoning in completed responses
name?: string name?: string
tool_calls?: any[] // Simplified tool_call_id?: string tool_calls?: any[] // Simplified tool_call_id?: string
} }

View File

@ -1228,7 +1228,6 @@ export default class llamacpp_extension extends AIEngine {
modelConfig.model_path, modelConfig.model_path,
]) ])
args.push('--jinja') args.push('--jinja')
args.push('--reasoning-format', 'none')
args.push('-m', modelPath) args.push('-m', modelPath)
// For overriding tensor buffer type, useful where // For overriding tensor buffer type, useful where
// massive MOE models can be made faster by keeping attention on the GPU // massive MOE models can be made faster by keeping attention on the GPU

View File

@ -0,0 +1,381 @@
import { describe, it, expect, beforeEach } from 'vitest'
import {
ReasoningProcessor,
extractReasoningFromMessage,
} from '../reasoning'
import { CompletionResponseChunk } from 'token.js'
import { chatCompletionChunk, chatCompletionRequestMessage } from '@janhq/core'
describe('extractReasoningFromMessage', () => {
it('should extract reasoning from message with reasoning_content property', () => {
const message = {
role: 'assistant' as const,
content: 'Hello',
reasoning_content: 'This is my reasoning content',
}
const result = extractReasoningFromMessage(message)
expect(result).toBe('This is my reasoning content')
})
it('should extract reasoning from message with legacy reasoning property', () => {
const message = {
role: 'assistant' as const,
content: 'Hello',
reasoning: 'This is my reasoning',
}
const result = extractReasoningFromMessage(message)
expect(result).toBe('This is my reasoning')
})
it('should prefer reasoning_content over reasoning property', () => {
const message = {
role: 'assistant' as const,
content: 'Hello',
reasoning_content: 'New reasoning content',
reasoning: 'Old reasoning',
}
const result = extractReasoningFromMessage(message)
expect(result).toBe('New reasoning content')
})
it('should return null for message without reasoning', () => {
const message = {
role: 'assistant' as const,
content: 'Hello',
}
const result = extractReasoningFromMessage(message)
expect(result).toBeNull()
})
it('should return null for null/undefined message', () => {
expect(extractReasoningFromMessage(null as any)).toBeNull()
expect(extractReasoningFromMessage(undefined as any)).toBeNull()
})
})
describe('ReasoningProcessor', () => {
let processor: ReasoningProcessor
beforeEach(() => {
processor = new ReasoningProcessor()
})
describe('processReasoningChunk', () => {
it('should start reasoning with opening think tag using reasoning_content', () => {
const chunk: chatCompletionChunk = {
choices: [{
delta: {
reasoning_content: 'Let me think about this...',
},
}],
}
const result = processor.processReasoningChunk(chunk)
expect(result).toBe('<think>Let me think about this...')
expect(processor.isReasoningInProgress()).toBe(true)
})
it('should start reasoning with opening think tag using legacy reasoning', () => {
const chunk: chatCompletionChunk = {
choices: [{
delta: {
reasoning: 'Let me think about this...',
},
}],
}
const result = processor.processReasoningChunk(chunk)
expect(result).toBe('<think>Let me think about this...')
expect(processor.isReasoningInProgress()).toBe(true)
})
it('should continue reasoning without opening tag', () => {
// Start reasoning
const chunk1: chatCompletionChunk = {
choices: [{
delta: {
reasoning_content: 'First part',
},
}],
}
processor.processReasoningChunk(chunk1)
// Continue reasoning
const chunk2: chatCompletionChunk = {
choices: [{
delta: {
reasoning_content: ' second part',
},
}],
}
const result = processor.processReasoningChunk(chunk2)
expect(result).toBe(' second part')
expect(processor.isReasoningInProgress()).toBe(true)
})
it('should end reasoning when content starts', () => {
// Start reasoning
const chunk1: chatCompletionChunk = {
choices: [{
delta: {
reasoning_content: 'Thinking...',
},
}],
}
processor.processReasoningChunk(chunk1)
// End reasoning with content
const chunk2: chatCompletionChunk = {
choices: [{
delta: {
content: 'Now I respond',
},
}],
}
const result = processor.processReasoningChunk(chunk2)
expect(result).toBe('</think>')
expect(processor.isReasoningInProgress()).toBe(false)
})
it('should handle empty reasoning chunks', () => {
const chunk: chatCompletionChunk = {
choices: [{
delta: {
reasoning_content: '',
},
}],
}
const result = processor.processReasoningChunk(chunk)
expect(result).toBe('')
expect(processor.isReasoningInProgress()).toBe(false)
})
it('should handle whitespace-only reasoning', () => {
const chunk: chatCompletionChunk = {
choices: [{
delta: {
reasoning_content: ' \n ',
},
}],
}
const result = processor.processReasoningChunk(chunk)
expect(result).toBe('')
expect(processor.isReasoningInProgress()).toBe(false)
})
it('should handle non-string reasoning', () => {
const chunk: chatCompletionChunk = {
choices: [{
delta: {
reasoning_content: null as any,
},
}],
}
const result = processor.processReasoningChunk(chunk)
expect(result).toBe('')
expect(processor.isReasoningInProgress()).toBe(false)
})
it('should handle chunk without choices', () => {
const chunk: chatCompletionChunk = {
choices: undefined as any,
}
const result = processor.processReasoningChunk(chunk)
expect(result).toBe('')
expect(processor.isReasoningInProgress()).toBe(false)
})
it('should handle chunk without delta', () => {
const chunk: chatCompletionChunk = {
choices: [{
delta: undefined as any,
}],
}
const result = processor.processReasoningChunk(chunk)
expect(result).toBe('')
expect(processor.isReasoningInProgress()).toBe(false)
})
it('should handle content without active reasoning', () => {
const chunk: chatCompletionChunk = {
choices: [{
delta: {
content: 'Regular content',
},
}],
}
const result = processor.processReasoningChunk(chunk)
expect(result).toBe('')
expect(processor.isReasoningInProgress()).toBe(false)
})
})
describe('finalize', () => {
it('should close reasoning if still active', () => {
// Start reasoning
const chunk: chatCompletionChunk = {
choices: [{
delta: {
reasoning_content: 'Unfinished thinking',
},
}],
}
processor.processReasoningChunk(chunk)
const result = processor.finalize()
expect(result).toBe('</think>')
expect(processor.isReasoningInProgress()).toBe(false)
})
it('should return empty string if no active reasoning', () => {
const result = processor.finalize()
expect(result).toBe('')
expect(processor.isReasoningInProgress()).toBe(false)
})
it('should handle multiple finalize calls', () => {
// Start reasoning
const chunk: chatCompletionChunk = {
choices: [{
delta: {
reasoning_content: 'Thinking',
},
}],
}
processor.processReasoningChunk(chunk)
// First finalize
const result1 = processor.finalize()
expect(result1).toBe('</think>')
// Second finalize should return empty
const result2 = processor.finalize()
expect(result2).toBe('')
})
})
describe('isReasoningInProgress', () => {
it('should track reasoning state correctly', () => {
expect(processor.isReasoningInProgress()).toBe(false)
// Start reasoning
const chunk1: chatCompletionChunk = {
choices: [{
delta: {
reasoning_content: 'Start thinking',
},
}],
}
processor.processReasoningChunk(chunk1)
expect(processor.isReasoningInProgress()).toBe(true)
// End with content
const chunk2: chatCompletionChunk = {
choices: [{
delta: {
content: 'Response',
},
}],
}
processor.processReasoningChunk(chunk2)
expect(processor.isReasoningInProgress()).toBe(false)
})
})
describe('integration scenarios', () => {
it('should handle complete reasoning flow', () => {
const chunks: chatCompletionChunk[] = [
{
choices: [{
delta: { reasoning_content: 'Let me think' },
}],
},
{
choices: [{
delta: { reasoning_content: ' about this problem' },
}],
},
{
choices: [{
delta: { reasoning_content: ' step by step.' },
}],
},
{
choices: [{
delta: { content: 'Based on my analysis,' },
}],
},
{
choices: [{
delta: { content: ' the answer is 42.' },
}],
},
]
const results = chunks.map(chunk => processor.processReasoningChunk(chunk))
expect(results[0]).toBe('<think>Let me think')
expect(results[1]).toBe(' about this problem')
expect(results[2]).toBe(' step by step.')
expect(results[3]).toBe('</think>')
expect(results[4]).toBe('')
expect(processor.isReasoningInProgress()).toBe(false)
})
it('should handle reasoning without content', () => {
const chunk: chatCompletionChunk = {
choices: [{
delta: { reasoning_content: 'Only reasoning, no content' },
}],
}
const result1 = processor.processReasoningChunk(chunk)
expect(result1).toBe('<think>Only reasoning, no content')
const result2 = processor.finalize()
expect(result2).toBe('</think>')
})
it('should handle mixed reasoning and content chunks', () => {
// Reasoning then content then reasoning again (edge case)
const chunk1: chatCompletionChunk = {
choices: [{
delta: { reasoning_content: 'First thought' },
}],
}
const chunk2: chatCompletionChunk = {
choices: [{
delta: { content: 'Some content' },
}],
}
const chunk3: chatCompletionChunk = {
choices: [{
delta: { reasoning_content: 'Second thought' },
}],
}
const result1 = processor.processReasoningChunk(chunk1)
const result2 = processor.processReasoningChunk(chunk2)
const result3 = processor.processReasoningChunk(chunk3)
expect(result1).toBe('<think>First thought')
expect(result2).toBe('</think>')
expect(result3).toBe('<think>Second thought')
})
})
})

View File

@ -5,6 +5,11 @@ import {
chatCompletionRequestMessage, chatCompletionRequestMessage,
} from '@janhq/core' } from '@janhq/core'
// Helper function to get reasoning content from an object
function getReasoning(obj: { reasoning_content?: string | null; reasoning?: string | null } | null | undefined): string | null {
return obj?.reasoning_content ?? obj?.reasoning ?? null
}
// Extract reasoning from a message (for completed responses) // Extract reasoning from a message (for completed responses)
export function extractReasoningFromMessage( export function extractReasoningFromMessage(
message: chatCompletionRequestMessage | ChatCompletionMessage message: chatCompletionRequestMessage | ChatCompletionMessage
@ -12,7 +17,7 @@ export function extractReasoningFromMessage(
if (!message) return null if (!message) return null
const extendedMessage = message as chatCompletionRequestMessage const extendedMessage = message as chatCompletionRequestMessage
return extendedMessage.reasoning || null return getReasoning(extendedMessage)
} }
// Extract reasoning from a chunk (for streaming responses) // Extract reasoning from a chunk (for streaming responses)
@ -22,7 +27,7 @@ function extractReasoningFromChunk(
if (!chunk.choices?.[0]?.delta) return null if (!chunk.choices?.[0]?.delta) return null
const delta = chunk.choices[0].delta as chatCompletionRequestMessage const delta = chunk.choices[0].delta as chatCompletionRequestMessage
const reasoning = delta.reasoning const reasoning = getReasoning(delta)
// Return null for falsy values, non-strings, or whitespace-only strings // Return null for falsy values, non-strings, or whitespace-only strings
if (!reasoning || typeof reasoning !== 'string' || !reasoning.trim()) if (!reasoning || typeof reasoning !== 'string' || !reasoning.trim())