chore: re enable reasoning_content in backend (#6228)
* chore: re enable reasoning_content in backend * chore: handle reasoning_content * chore: refactor get reasoning content * chore: update PR review --------- Co-authored-by: Faisal Amir <urmauur@gmail.com>
This commit is contained in:
parent
43ab6dfc65
commit
906b87022d
@ -7,6 +7,7 @@ export interface chatCompletionRequestMessage {
|
|||||||
role: 'system' | 'user' | 'assistant' | 'tool'
|
role: 'system' | 'user' | 'assistant' | 'tool'
|
||||||
content: string | null | Content[] // Content can be a string OR an array of content parts
|
content: string | null | Content[] // Content can be a string OR an array of content parts
|
||||||
reasoning?: string | null // Some models return reasoning in completed responses
|
reasoning?: string | null // Some models return reasoning in completed responses
|
||||||
|
reasoning_content?: string | null // Some models return reasoning in completed responses
|
||||||
name?: string
|
name?: string
|
||||||
tool_calls?: any[] // Simplified tool_call_id?: string
|
tool_calls?: any[] // Simplified tool_call_id?: string
|
||||||
}
|
}
|
||||||
@ -274,7 +275,7 @@ export abstract class AIEngine extends BaseExtension {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if a tool is supported by the model
|
* Check if a tool is supported by the model
|
||||||
* @param modelId
|
* @param modelId
|
||||||
*/
|
*/
|
||||||
abstract isToolSupported(modelId: string): Promise<boolean>
|
abstract isToolSupported(modelId: string): Promise<boolean>
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1228,7 +1228,6 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
modelConfig.model_path,
|
modelConfig.model_path,
|
||||||
])
|
])
|
||||||
args.push('--jinja')
|
args.push('--jinja')
|
||||||
args.push('--reasoning-format', 'none')
|
|
||||||
args.push('-m', modelPath)
|
args.push('-m', modelPath)
|
||||||
// For overriding tensor buffer type, useful where
|
// For overriding tensor buffer type, useful where
|
||||||
// massive MOE models can be made faster by keeping attention on the GPU
|
// massive MOE models can be made faster by keeping attention on the GPU
|
||||||
|
|||||||
381
web-app/src/utils/__tests__/reasoning.test.ts
Normal file
381
web-app/src/utils/__tests__/reasoning.test.ts
Normal file
@ -0,0 +1,381 @@
|
|||||||
|
import { describe, it, expect, beforeEach } from 'vitest'
|
||||||
|
import {
|
||||||
|
ReasoningProcessor,
|
||||||
|
extractReasoningFromMessage,
|
||||||
|
} from '../reasoning'
|
||||||
|
import { CompletionResponseChunk } from 'token.js'
|
||||||
|
import { chatCompletionChunk, chatCompletionRequestMessage } from '@janhq/core'
|
||||||
|
|
||||||
|
describe('extractReasoningFromMessage', () => {
|
||||||
|
it('should extract reasoning from message with reasoning_content property', () => {
|
||||||
|
const message = {
|
||||||
|
role: 'assistant' as const,
|
||||||
|
content: 'Hello',
|
||||||
|
reasoning_content: 'This is my reasoning content',
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = extractReasoningFromMessage(message)
|
||||||
|
expect(result).toBe('This is my reasoning content')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should extract reasoning from message with legacy reasoning property', () => {
|
||||||
|
const message = {
|
||||||
|
role: 'assistant' as const,
|
||||||
|
content: 'Hello',
|
||||||
|
reasoning: 'This is my reasoning',
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = extractReasoningFromMessage(message)
|
||||||
|
expect(result).toBe('This is my reasoning')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should prefer reasoning_content over reasoning property', () => {
|
||||||
|
const message = {
|
||||||
|
role: 'assistant' as const,
|
||||||
|
content: 'Hello',
|
||||||
|
reasoning_content: 'New reasoning content',
|
||||||
|
reasoning: 'Old reasoning',
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = extractReasoningFromMessage(message)
|
||||||
|
expect(result).toBe('New reasoning content')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should return null for message without reasoning', () => {
|
||||||
|
const message = {
|
||||||
|
role: 'assistant' as const,
|
||||||
|
content: 'Hello',
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = extractReasoningFromMessage(message)
|
||||||
|
expect(result).toBeNull()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should return null for null/undefined message', () => {
|
||||||
|
expect(extractReasoningFromMessage(null as any)).toBeNull()
|
||||||
|
expect(extractReasoningFromMessage(undefined as any)).toBeNull()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('ReasoningProcessor', () => {
|
||||||
|
let processor: ReasoningProcessor
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
processor = new ReasoningProcessor()
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('processReasoningChunk', () => {
|
||||||
|
it('should start reasoning with opening think tag using reasoning_content', () => {
|
||||||
|
const chunk: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: {
|
||||||
|
reasoning_content: 'Let me think about this...',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = processor.processReasoningChunk(chunk)
|
||||||
|
expect(result).toBe('<think>Let me think about this...')
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should start reasoning with opening think tag using legacy reasoning', () => {
|
||||||
|
const chunk: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: {
|
||||||
|
reasoning: 'Let me think about this...',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = processor.processReasoningChunk(chunk)
|
||||||
|
expect(result).toBe('<think>Let me think about this...')
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should continue reasoning without opening tag', () => {
|
||||||
|
// Start reasoning
|
||||||
|
const chunk1: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: {
|
||||||
|
reasoning_content: 'First part',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
processor.processReasoningChunk(chunk1)
|
||||||
|
|
||||||
|
// Continue reasoning
|
||||||
|
const chunk2: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: {
|
||||||
|
reasoning_content: ' second part',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = processor.processReasoningChunk(chunk2)
|
||||||
|
expect(result).toBe(' second part')
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should end reasoning when content starts', () => {
|
||||||
|
// Start reasoning
|
||||||
|
const chunk1: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: {
|
||||||
|
reasoning_content: 'Thinking...',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
processor.processReasoningChunk(chunk1)
|
||||||
|
|
||||||
|
// End reasoning with content
|
||||||
|
const chunk2: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: {
|
||||||
|
content: 'Now I respond',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = processor.processReasoningChunk(chunk2)
|
||||||
|
expect(result).toBe('</think>')
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle empty reasoning chunks', () => {
|
||||||
|
const chunk: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: {
|
||||||
|
reasoning_content: '',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = processor.processReasoningChunk(chunk)
|
||||||
|
expect(result).toBe('')
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle whitespace-only reasoning', () => {
|
||||||
|
const chunk: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: {
|
||||||
|
reasoning_content: ' \n ',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = processor.processReasoningChunk(chunk)
|
||||||
|
expect(result).toBe('')
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle non-string reasoning', () => {
|
||||||
|
const chunk: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: {
|
||||||
|
reasoning_content: null as any,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = processor.processReasoningChunk(chunk)
|
||||||
|
expect(result).toBe('')
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle chunk without choices', () => {
|
||||||
|
const chunk: chatCompletionChunk = {
|
||||||
|
choices: undefined as any,
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = processor.processReasoningChunk(chunk)
|
||||||
|
expect(result).toBe('')
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle chunk without delta', () => {
|
||||||
|
const chunk: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: undefined as any,
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = processor.processReasoningChunk(chunk)
|
||||||
|
expect(result).toBe('')
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle content without active reasoning', () => {
|
||||||
|
const chunk: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: {
|
||||||
|
content: 'Regular content',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = processor.processReasoningChunk(chunk)
|
||||||
|
expect(result).toBe('')
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(false)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('finalize', () => {
|
||||||
|
it('should close reasoning if still active', () => {
|
||||||
|
// Start reasoning
|
||||||
|
const chunk: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: {
|
||||||
|
reasoning_content: 'Unfinished thinking',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
processor.processReasoningChunk(chunk)
|
||||||
|
|
||||||
|
const result = processor.finalize()
|
||||||
|
expect(result).toBe('</think>')
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should return empty string if no active reasoning', () => {
|
||||||
|
const result = processor.finalize()
|
||||||
|
expect(result).toBe('')
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle multiple finalize calls', () => {
|
||||||
|
// Start reasoning
|
||||||
|
const chunk: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: {
|
||||||
|
reasoning_content: 'Thinking',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
processor.processReasoningChunk(chunk)
|
||||||
|
|
||||||
|
// First finalize
|
||||||
|
const result1 = processor.finalize()
|
||||||
|
expect(result1).toBe('</think>')
|
||||||
|
|
||||||
|
// Second finalize should return empty
|
||||||
|
const result2 = processor.finalize()
|
||||||
|
expect(result2).toBe('')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('isReasoningInProgress', () => {
|
||||||
|
it('should track reasoning state correctly', () => {
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(false)
|
||||||
|
|
||||||
|
// Start reasoning
|
||||||
|
const chunk1: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: {
|
||||||
|
reasoning_content: 'Start thinking',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
processor.processReasoningChunk(chunk1)
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(true)
|
||||||
|
|
||||||
|
// End with content
|
||||||
|
const chunk2: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: {
|
||||||
|
content: 'Response',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
processor.processReasoningChunk(chunk2)
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(false)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('integration scenarios', () => {
|
||||||
|
it('should handle complete reasoning flow', () => {
|
||||||
|
const chunks: chatCompletionChunk[] = [
|
||||||
|
{
|
||||||
|
choices: [{
|
||||||
|
delta: { reasoning_content: 'Let me think' },
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
choices: [{
|
||||||
|
delta: { reasoning_content: ' about this problem' },
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
choices: [{
|
||||||
|
delta: { reasoning_content: ' step by step.' },
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
choices: [{
|
||||||
|
delta: { content: 'Based on my analysis,' },
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
choices: [{
|
||||||
|
delta: { content: ' the answer is 42.' },
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
const results = chunks.map(chunk => processor.processReasoningChunk(chunk))
|
||||||
|
|
||||||
|
expect(results[0]).toBe('<think>Let me think')
|
||||||
|
expect(results[1]).toBe(' about this problem')
|
||||||
|
expect(results[2]).toBe(' step by step.')
|
||||||
|
expect(results[3]).toBe('</think>')
|
||||||
|
expect(results[4]).toBe('')
|
||||||
|
|
||||||
|
expect(processor.isReasoningInProgress()).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle reasoning without content', () => {
|
||||||
|
const chunk: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: { reasoning_content: 'Only reasoning, no content' },
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
const result1 = processor.processReasoningChunk(chunk)
|
||||||
|
expect(result1).toBe('<think>Only reasoning, no content')
|
||||||
|
|
||||||
|
const result2 = processor.finalize()
|
||||||
|
expect(result2).toBe('</think>')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle mixed reasoning and content chunks', () => {
|
||||||
|
// Reasoning then content then reasoning again (edge case)
|
||||||
|
const chunk1: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: { reasoning_content: 'First thought' },
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
const chunk2: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: { content: 'Some content' },
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
const chunk3: chatCompletionChunk = {
|
||||||
|
choices: [{
|
||||||
|
delta: { reasoning_content: 'Second thought' },
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
const result1 = processor.processReasoningChunk(chunk1)
|
||||||
|
const result2 = processor.processReasoningChunk(chunk2)
|
||||||
|
const result3 = processor.processReasoningChunk(chunk3)
|
||||||
|
|
||||||
|
expect(result1).toBe('<think>First thought')
|
||||||
|
expect(result2).toBe('</think>')
|
||||||
|
expect(result3).toBe('<think>Second thought')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
@ -5,6 +5,11 @@ import {
|
|||||||
chatCompletionRequestMessage,
|
chatCompletionRequestMessage,
|
||||||
} from '@janhq/core'
|
} from '@janhq/core'
|
||||||
|
|
||||||
|
// Helper function to get reasoning content from an object
|
||||||
|
function getReasoning(obj: { reasoning_content?: string | null; reasoning?: string | null } | null | undefined): string | null {
|
||||||
|
return obj?.reasoning_content ?? obj?.reasoning ?? null
|
||||||
|
}
|
||||||
|
|
||||||
// Extract reasoning from a message (for completed responses)
|
// Extract reasoning from a message (for completed responses)
|
||||||
export function extractReasoningFromMessage(
|
export function extractReasoningFromMessage(
|
||||||
message: chatCompletionRequestMessage | ChatCompletionMessage
|
message: chatCompletionRequestMessage | ChatCompletionMessage
|
||||||
@ -12,7 +17,7 @@ export function extractReasoningFromMessage(
|
|||||||
if (!message) return null
|
if (!message) return null
|
||||||
|
|
||||||
const extendedMessage = message as chatCompletionRequestMessage
|
const extendedMessage = message as chatCompletionRequestMessage
|
||||||
return extendedMessage.reasoning || null
|
return getReasoning(extendedMessage)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract reasoning from a chunk (for streaming responses)
|
// Extract reasoning from a chunk (for streaming responses)
|
||||||
@ -22,7 +27,7 @@ function extractReasoningFromChunk(
|
|||||||
if (!chunk.choices?.[0]?.delta) return null
|
if (!chunk.choices?.[0]?.delta) return null
|
||||||
|
|
||||||
const delta = chunk.choices[0].delta as chatCompletionRequestMessage
|
const delta = chunk.choices[0].delta as chatCompletionRequestMessage
|
||||||
const reasoning = delta.reasoning
|
const reasoning = getReasoning(delta)
|
||||||
|
|
||||||
// Return null for falsy values, non-strings, or whitespace-only strings
|
// Return null for falsy values, non-strings, or whitespace-only strings
|
||||||
if (!reasoning || typeof reasoning !== 'string' || !reasoning.trim())
|
if (!reasoning || typeof reasoning !== 'string' || !reasoning.trim())
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user