chore: re enable reasoning_content in backend (#6228)
* chore: re enable reasoning_content in backend * chore: handle reasoning_content * chore: refactor get reasoning content * chore: update PR review --------- Co-authored-by: Faisal Amir <urmauur@gmail.com>
This commit is contained in:
parent
43ab6dfc65
commit
906b87022d
@ -7,6 +7,7 @@ export interface chatCompletionRequestMessage {
|
||||
role: 'system' | 'user' | 'assistant' | 'tool'
|
||||
content: string | null | Content[] // Content can be a string OR an array of content parts
|
||||
reasoning?: string | null // Some models return reasoning in completed responses
|
||||
reasoning_content?: string | null // Some models return reasoning in completed responses
|
||||
name?: string
|
||||
tool_calls?: any[] // Simplified tool_call_id?: string
|
||||
}
|
||||
@ -274,7 +275,7 @@ export abstract class AIEngine extends BaseExtension {
|
||||
|
||||
/**
|
||||
* Check if a tool is supported by the model
|
||||
* @param modelId
|
||||
* @param modelId
|
||||
*/
|
||||
abstract isToolSupported(modelId: string): Promise<boolean>
|
||||
}
|
||||
|
||||
@ -1228,7 +1228,6 @@ export default class llamacpp_extension extends AIEngine {
|
||||
modelConfig.model_path,
|
||||
])
|
||||
args.push('--jinja')
|
||||
args.push('--reasoning-format', 'none')
|
||||
args.push('-m', modelPath)
|
||||
// For overriding tensor buffer type, useful where
|
||||
// massive MOE models can be made faster by keeping attention on the GPU
|
||||
|
||||
381
web-app/src/utils/__tests__/reasoning.test.ts
Normal file
381
web-app/src/utils/__tests__/reasoning.test.ts
Normal file
@ -0,0 +1,381 @@
|
||||
import { describe, it, expect, beforeEach } from 'vitest'
|
||||
import {
|
||||
ReasoningProcessor,
|
||||
extractReasoningFromMessage,
|
||||
} from '../reasoning'
|
||||
import { CompletionResponseChunk } from 'token.js'
|
||||
import { chatCompletionChunk, chatCompletionRequestMessage } from '@janhq/core'
|
||||
|
||||
describe('extractReasoningFromMessage', () => {
|
||||
it('should extract reasoning from message with reasoning_content property', () => {
|
||||
const message = {
|
||||
role: 'assistant' as const,
|
||||
content: 'Hello',
|
||||
reasoning_content: 'This is my reasoning content',
|
||||
}
|
||||
|
||||
const result = extractReasoningFromMessage(message)
|
||||
expect(result).toBe('This is my reasoning content')
|
||||
})
|
||||
|
||||
it('should extract reasoning from message with legacy reasoning property', () => {
|
||||
const message = {
|
||||
role: 'assistant' as const,
|
||||
content: 'Hello',
|
||||
reasoning: 'This is my reasoning',
|
||||
}
|
||||
|
||||
const result = extractReasoningFromMessage(message)
|
||||
expect(result).toBe('This is my reasoning')
|
||||
})
|
||||
|
||||
it('should prefer reasoning_content over reasoning property', () => {
|
||||
const message = {
|
||||
role: 'assistant' as const,
|
||||
content: 'Hello',
|
||||
reasoning_content: 'New reasoning content',
|
||||
reasoning: 'Old reasoning',
|
||||
}
|
||||
|
||||
const result = extractReasoningFromMessage(message)
|
||||
expect(result).toBe('New reasoning content')
|
||||
})
|
||||
|
||||
it('should return null for message without reasoning', () => {
|
||||
const message = {
|
||||
role: 'assistant' as const,
|
||||
content: 'Hello',
|
||||
}
|
||||
|
||||
const result = extractReasoningFromMessage(message)
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
|
||||
it('should return null for null/undefined message', () => {
|
||||
expect(extractReasoningFromMessage(null as any)).toBeNull()
|
||||
expect(extractReasoningFromMessage(undefined as any)).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
describe('ReasoningProcessor', () => {
|
||||
let processor: ReasoningProcessor
|
||||
|
||||
beforeEach(() => {
|
||||
processor = new ReasoningProcessor()
|
||||
})
|
||||
|
||||
describe('processReasoningChunk', () => {
|
||||
it('should start reasoning with opening think tag using reasoning_content', () => {
|
||||
const chunk: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: {
|
||||
reasoning_content: 'Let me think about this...',
|
||||
},
|
||||
}],
|
||||
}
|
||||
|
||||
const result = processor.processReasoningChunk(chunk)
|
||||
expect(result).toBe('<think>Let me think about this...')
|
||||
expect(processor.isReasoningInProgress()).toBe(true)
|
||||
})
|
||||
|
||||
it('should start reasoning with opening think tag using legacy reasoning', () => {
|
||||
const chunk: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: {
|
||||
reasoning: 'Let me think about this...',
|
||||
},
|
||||
}],
|
||||
}
|
||||
|
||||
const result = processor.processReasoningChunk(chunk)
|
||||
expect(result).toBe('<think>Let me think about this...')
|
||||
expect(processor.isReasoningInProgress()).toBe(true)
|
||||
})
|
||||
|
||||
it('should continue reasoning without opening tag', () => {
|
||||
// Start reasoning
|
||||
const chunk1: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: {
|
||||
reasoning_content: 'First part',
|
||||
},
|
||||
}],
|
||||
}
|
||||
processor.processReasoningChunk(chunk1)
|
||||
|
||||
// Continue reasoning
|
||||
const chunk2: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: {
|
||||
reasoning_content: ' second part',
|
||||
},
|
||||
}],
|
||||
}
|
||||
|
||||
const result = processor.processReasoningChunk(chunk2)
|
||||
expect(result).toBe(' second part')
|
||||
expect(processor.isReasoningInProgress()).toBe(true)
|
||||
})
|
||||
|
||||
it('should end reasoning when content starts', () => {
|
||||
// Start reasoning
|
||||
const chunk1: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: {
|
||||
reasoning_content: 'Thinking...',
|
||||
},
|
||||
}],
|
||||
}
|
||||
processor.processReasoningChunk(chunk1)
|
||||
|
||||
// End reasoning with content
|
||||
const chunk2: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: {
|
||||
content: 'Now I respond',
|
||||
},
|
||||
}],
|
||||
}
|
||||
|
||||
const result = processor.processReasoningChunk(chunk2)
|
||||
expect(result).toBe('</think>')
|
||||
expect(processor.isReasoningInProgress()).toBe(false)
|
||||
})
|
||||
|
||||
it('should handle empty reasoning chunks', () => {
|
||||
const chunk: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: {
|
||||
reasoning_content: '',
|
||||
},
|
||||
}],
|
||||
}
|
||||
|
||||
const result = processor.processReasoningChunk(chunk)
|
||||
expect(result).toBe('')
|
||||
expect(processor.isReasoningInProgress()).toBe(false)
|
||||
})
|
||||
|
||||
it('should handle whitespace-only reasoning', () => {
|
||||
const chunk: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: {
|
||||
reasoning_content: ' \n ',
|
||||
},
|
||||
}],
|
||||
}
|
||||
|
||||
const result = processor.processReasoningChunk(chunk)
|
||||
expect(result).toBe('')
|
||||
expect(processor.isReasoningInProgress()).toBe(false)
|
||||
})
|
||||
|
||||
it('should handle non-string reasoning', () => {
|
||||
const chunk: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: {
|
||||
reasoning_content: null as any,
|
||||
},
|
||||
}],
|
||||
}
|
||||
|
||||
const result = processor.processReasoningChunk(chunk)
|
||||
expect(result).toBe('')
|
||||
expect(processor.isReasoningInProgress()).toBe(false)
|
||||
})
|
||||
|
||||
it('should handle chunk without choices', () => {
|
||||
const chunk: chatCompletionChunk = {
|
||||
choices: undefined as any,
|
||||
}
|
||||
|
||||
const result = processor.processReasoningChunk(chunk)
|
||||
expect(result).toBe('')
|
||||
expect(processor.isReasoningInProgress()).toBe(false)
|
||||
})
|
||||
|
||||
it('should handle chunk without delta', () => {
|
||||
const chunk: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: undefined as any,
|
||||
}],
|
||||
}
|
||||
|
||||
const result = processor.processReasoningChunk(chunk)
|
||||
expect(result).toBe('')
|
||||
expect(processor.isReasoningInProgress()).toBe(false)
|
||||
})
|
||||
|
||||
it('should handle content without active reasoning', () => {
|
||||
const chunk: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: {
|
||||
content: 'Regular content',
|
||||
},
|
||||
}],
|
||||
}
|
||||
|
||||
const result = processor.processReasoningChunk(chunk)
|
||||
expect(result).toBe('')
|
||||
expect(processor.isReasoningInProgress()).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
describe('finalize', () => {
|
||||
it('should close reasoning if still active', () => {
|
||||
// Start reasoning
|
||||
const chunk: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: {
|
||||
reasoning_content: 'Unfinished thinking',
|
||||
},
|
||||
}],
|
||||
}
|
||||
processor.processReasoningChunk(chunk)
|
||||
|
||||
const result = processor.finalize()
|
||||
expect(result).toBe('</think>')
|
||||
expect(processor.isReasoningInProgress()).toBe(false)
|
||||
})
|
||||
|
||||
it('should return empty string if no active reasoning', () => {
|
||||
const result = processor.finalize()
|
||||
expect(result).toBe('')
|
||||
expect(processor.isReasoningInProgress()).toBe(false)
|
||||
})
|
||||
|
||||
it('should handle multiple finalize calls', () => {
|
||||
// Start reasoning
|
||||
const chunk: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: {
|
||||
reasoning_content: 'Thinking',
|
||||
},
|
||||
}],
|
||||
}
|
||||
processor.processReasoningChunk(chunk)
|
||||
|
||||
// First finalize
|
||||
const result1 = processor.finalize()
|
||||
expect(result1).toBe('</think>')
|
||||
|
||||
// Second finalize should return empty
|
||||
const result2 = processor.finalize()
|
||||
expect(result2).toBe('')
|
||||
})
|
||||
})
|
||||
|
||||
describe('isReasoningInProgress', () => {
|
||||
it('should track reasoning state correctly', () => {
|
||||
expect(processor.isReasoningInProgress()).toBe(false)
|
||||
|
||||
// Start reasoning
|
||||
const chunk1: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: {
|
||||
reasoning_content: 'Start thinking',
|
||||
},
|
||||
}],
|
||||
}
|
||||
processor.processReasoningChunk(chunk1)
|
||||
expect(processor.isReasoningInProgress()).toBe(true)
|
||||
|
||||
// End with content
|
||||
const chunk2: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: {
|
||||
content: 'Response',
|
||||
},
|
||||
}],
|
||||
}
|
||||
processor.processReasoningChunk(chunk2)
|
||||
expect(processor.isReasoningInProgress()).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
describe('integration scenarios', () => {
|
||||
it('should handle complete reasoning flow', () => {
|
||||
const chunks: chatCompletionChunk[] = [
|
||||
{
|
||||
choices: [{
|
||||
delta: { reasoning_content: 'Let me think' },
|
||||
}],
|
||||
},
|
||||
{
|
||||
choices: [{
|
||||
delta: { reasoning_content: ' about this problem' },
|
||||
}],
|
||||
},
|
||||
{
|
||||
choices: [{
|
||||
delta: { reasoning_content: ' step by step.' },
|
||||
}],
|
||||
},
|
||||
{
|
||||
choices: [{
|
||||
delta: { content: 'Based on my analysis,' },
|
||||
}],
|
||||
},
|
||||
{
|
||||
choices: [{
|
||||
delta: { content: ' the answer is 42.' },
|
||||
}],
|
||||
},
|
||||
]
|
||||
|
||||
const results = chunks.map(chunk => processor.processReasoningChunk(chunk))
|
||||
|
||||
expect(results[0]).toBe('<think>Let me think')
|
||||
expect(results[1]).toBe(' about this problem')
|
||||
expect(results[2]).toBe(' step by step.')
|
||||
expect(results[3]).toBe('</think>')
|
||||
expect(results[4]).toBe('')
|
||||
|
||||
expect(processor.isReasoningInProgress()).toBe(false)
|
||||
})
|
||||
|
||||
it('should handle reasoning without content', () => {
|
||||
const chunk: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: { reasoning_content: 'Only reasoning, no content' },
|
||||
}],
|
||||
}
|
||||
|
||||
const result1 = processor.processReasoningChunk(chunk)
|
||||
expect(result1).toBe('<think>Only reasoning, no content')
|
||||
|
||||
const result2 = processor.finalize()
|
||||
expect(result2).toBe('</think>')
|
||||
})
|
||||
|
||||
it('should handle mixed reasoning and content chunks', () => {
|
||||
// Reasoning then content then reasoning again (edge case)
|
||||
const chunk1: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: { reasoning_content: 'First thought' },
|
||||
}],
|
||||
}
|
||||
|
||||
const chunk2: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: { content: 'Some content' },
|
||||
}],
|
||||
}
|
||||
|
||||
const chunk3: chatCompletionChunk = {
|
||||
choices: [{
|
||||
delta: { reasoning_content: 'Second thought' },
|
||||
}],
|
||||
}
|
||||
|
||||
const result1 = processor.processReasoningChunk(chunk1)
|
||||
const result2 = processor.processReasoningChunk(chunk2)
|
||||
const result3 = processor.processReasoningChunk(chunk3)
|
||||
|
||||
expect(result1).toBe('<think>First thought')
|
||||
expect(result2).toBe('</think>')
|
||||
expect(result3).toBe('<think>Second thought')
|
||||
})
|
||||
})
|
||||
})
|
||||
@ -5,6 +5,11 @@ import {
|
||||
chatCompletionRequestMessage,
|
||||
} from '@janhq/core'
|
||||
|
||||
// Helper function to get reasoning content from an object
|
||||
function getReasoning(obj: { reasoning_content?: string | null; reasoning?: string | null } | null | undefined): string | null {
|
||||
return obj?.reasoning_content ?? obj?.reasoning ?? null
|
||||
}
|
||||
|
||||
// Extract reasoning from a message (for completed responses)
|
||||
export function extractReasoningFromMessage(
|
||||
message: chatCompletionRequestMessage | ChatCompletionMessage
|
||||
@ -12,7 +17,7 @@ export function extractReasoningFromMessage(
|
||||
if (!message) return null
|
||||
|
||||
const extendedMessage = message as chatCompletionRequestMessage
|
||||
return extendedMessage.reasoning || null
|
||||
return getReasoning(extendedMessage)
|
||||
}
|
||||
|
||||
// Extract reasoning from a chunk (for streaming responses)
|
||||
@ -22,7 +27,7 @@ function extractReasoningFromChunk(
|
||||
if (!chunk.choices?.[0]?.delta) return null
|
||||
|
||||
const delta = chunk.choices[0].delta as chatCompletionRequestMessage
|
||||
const reasoning = delta.reasoning
|
||||
const reasoning = getReasoning(delta)
|
||||
|
||||
// Return null for falsy values, non-strings, or whitespace-only strings
|
||||
if (!reasoning || typeof reasoning !== 'string' || !reasoning.trim())
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user