chore: re enable reasoning_content in backend (#6228)

* chore: re enable reasoning_content in backend * chore: handle reasoning_content * chore: refactor get reasoning content * chore: update PR review --------- Co-authored-by: Faisal Amir <urmauur@gmail.com>
2025-08-20 13:06:21 +05:30 · 2025-08-20 13:06:21 +05:30 · 906b87022d
commit 906b87022d
parent 43ab6dfc65
4 changed files with 390 additions and 4 deletions
--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@ -7,6 +7,7 @@ export interface chatCompletionRequestMessage {
  role: 'system' | 'user' | 'assistant' | 'tool'
  content: string | null | Content[] // Content can be a string OR an array of content parts
  reasoning?: string | null // Some models return reasoning in completed responses
+  reasoning_content?: string | null // Some models return reasoning in completed responses
  name?: string
  tool_calls?: any[] // Simplified tool_call_id?: string
 }
@ -274,7 +275,7 @@ export abstract class AIEngine extends BaseExtension {

  /**
   * Check if a tool is supported by the model
-   * @param modelId 
+   * @param modelId
   */
  abstract isToolSupported(modelId: string): Promise<boolean>
 }
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -1228,7 +1228,6 @@ export default class llamacpp_extension extends AIEngine {
      modelConfig.model_path,
    ])
    args.push('--jinja')
-    args.push('--reasoning-format', 'none')
    args.push('-m', modelPath)
    // For overriding tensor buffer type, useful where
    // massive MOE models can be made faster by keeping attention on the GPU
--- a/web-app/src/utils/tests/reasoning.test.ts
+++ b/web-app/src/utils/tests/reasoning.test.ts
@ -0,0 +1,381 @@
+import { describe, it, expect, beforeEach } from 'vitest'
+import {
+  ReasoningProcessor,
+  extractReasoningFromMessage,
+} from '../reasoning'
+import { CompletionResponseChunk } from 'token.js'
+import { chatCompletionChunk, chatCompletionRequestMessage } from '@janhq/core'
+
+describe('extractReasoningFromMessage', () => {
+  it('should extract reasoning from message with reasoning_content property', () => {
+    const message = {
+      role: 'assistant' as const,
+      content: 'Hello',
+      reasoning_content: 'This is my reasoning content',
+    }
+
+    const result = extractReasoningFromMessage(message)
+    expect(result).toBe('This is my reasoning content')
+  })
+
+  it('should extract reasoning from message with legacy reasoning property', () => {
+    const message = {
+      role: 'assistant' as const,
+      content: 'Hello',
+      reasoning: 'This is my reasoning',
+    }
+
+    const result = extractReasoningFromMessage(message)
+    expect(result).toBe('This is my reasoning')
+  })
+
+  it('should prefer reasoning_content over reasoning property', () => {
+    const message = {
+      role: 'assistant' as const,
+      content: 'Hello',
+      reasoning_content: 'New reasoning content',
+      reasoning: 'Old reasoning',
+    }
+
+    const result = extractReasoningFromMessage(message)
+    expect(result).toBe('New reasoning content')
+  })
+
+  it('should return null for message without reasoning', () => {
+    const message = {
+      role: 'assistant' as const,
+      content: 'Hello',
+    }
+
+    const result = extractReasoningFromMessage(message)
+    expect(result).toBeNull()
+  })
+
+  it('should return null for null/undefined message', () => {
+    expect(extractReasoningFromMessage(null as any)).toBeNull()
+    expect(extractReasoningFromMessage(undefined as any)).toBeNull()
+  })
+})
+
+describe('ReasoningProcessor', () => {
+  let processor: ReasoningProcessor
+
+  beforeEach(() => {
+    processor = new ReasoningProcessor()
+  })
+
+  describe('processReasoningChunk', () => {
+    it('should start reasoning with opening think tag using reasoning_content', () => {
+      const chunk: chatCompletionChunk = {
+        choices: [{
+          delta: {
+            reasoning_content: 'Let me think about this...',
+          },
+        }],
+      }
+
+      const result = processor.processReasoningChunk(chunk)
+      expect(result).toBe('<think>Let me think about this...')
+      expect(processor.isReasoningInProgress()).toBe(true)
+    })
+
+    it('should start reasoning with opening think tag using legacy reasoning', () => {
+      const chunk: chatCompletionChunk = {
+        choices: [{
+          delta: {
+            reasoning: 'Let me think about this...',
+          },
+        }],
+      }
+
+      const result = processor.processReasoningChunk(chunk)
+      expect(result).toBe('<think>Let me think about this...')
+      expect(processor.isReasoningInProgress()).toBe(true)
+    })
+
+    it('should continue reasoning without opening tag', () => {
+      // Start reasoning
+      const chunk1: chatCompletionChunk = {
+        choices: [{
+          delta: {
+            reasoning_content: 'First part',
+          },
+        }],
+      }
+      processor.processReasoningChunk(chunk1)
+
+      // Continue reasoning
+      const chunk2: chatCompletionChunk = {
+        choices: [{
+          delta: {
+            reasoning_content: ' second part',
+          },
+        }],
+      }
+
+      const result = processor.processReasoningChunk(chunk2)
+      expect(result).toBe(' second part')
+      expect(processor.isReasoningInProgress()).toBe(true)
+    })
+
+    it('should end reasoning when content starts', () => {
+      // Start reasoning
+      const chunk1: chatCompletionChunk = {
+        choices: [{
+          delta: {
+            reasoning_content: 'Thinking...',
+          },
+        }],
+      }
+      processor.processReasoningChunk(chunk1)
+
+      // End reasoning with content
+      const chunk2: chatCompletionChunk = {
+        choices: [{
+          delta: {
+            content: 'Now I respond',
+          },
+        }],
+      }
+
+      const result = processor.processReasoningChunk(chunk2)
+      expect(result).toBe('</think>')
+      expect(processor.isReasoningInProgress()).toBe(false)
+    })
+
+    it('should handle empty reasoning chunks', () => {
+      const chunk: chatCompletionChunk = {
+        choices: [{
+          delta: {
+            reasoning_content: '',
+          },
+        }],
+      }
+
+      const result = processor.processReasoningChunk(chunk)
+      expect(result).toBe('')
+      expect(processor.isReasoningInProgress()).toBe(false)
+    })
+
+    it('should handle whitespace-only reasoning', () => {
+      const chunk: chatCompletionChunk = {
+        choices: [{
+          delta: {
+            reasoning_content: '   \n  ',
+          },
+        }],
+      }
+
+      const result = processor.processReasoningChunk(chunk)
+      expect(result).toBe('')
+      expect(processor.isReasoningInProgress()).toBe(false)
+    })
+
+    it('should handle non-string reasoning', () => {
+      const chunk: chatCompletionChunk = {
+        choices: [{
+          delta: {
+            reasoning_content: null as any,
+          },
+        }],
+      }
+
+      const result = processor.processReasoningChunk(chunk)
+      expect(result).toBe('')
+      expect(processor.isReasoningInProgress()).toBe(false)
+    })
+
+    it('should handle chunk without choices', () => {
+      const chunk: chatCompletionChunk = {
+        choices: undefined as any,
+      }
+
+      const result = processor.processReasoningChunk(chunk)
+      expect(result).toBe('')
+      expect(processor.isReasoningInProgress()).toBe(false)
+    })
+
+    it('should handle chunk without delta', () => {
+      const chunk: chatCompletionChunk = {
+        choices: [{
+          delta: undefined as any,
+        }],
+      }
+
+      const result = processor.processReasoningChunk(chunk)
+      expect(result).toBe('')
+      expect(processor.isReasoningInProgress()).toBe(false)
+    })
+
+    it('should handle content without active reasoning', () => {
+      const chunk: chatCompletionChunk = {
+        choices: [{
+          delta: {
+            content: 'Regular content',
+          },
+        }],
+      }
+
+      const result = processor.processReasoningChunk(chunk)
+      expect(result).toBe('')
+      expect(processor.isReasoningInProgress()).toBe(false)
+    })
+  })
+
+  describe('finalize', () => {
+    it('should close reasoning if still active', () => {
+      // Start reasoning
+      const chunk: chatCompletionChunk = {
+        choices: [{
+          delta: {
+            reasoning_content: 'Unfinished thinking',
+          },
+        }],
+      }
+      processor.processReasoningChunk(chunk)
+
+      const result = processor.finalize()
+      expect(result).toBe('</think>')
+      expect(processor.isReasoningInProgress()).toBe(false)
+    })
+
+    it('should return empty string if no active reasoning', () => {
+      const result = processor.finalize()
+      expect(result).toBe('')
+      expect(processor.isReasoningInProgress()).toBe(false)
+    })
+
+    it('should handle multiple finalize calls', () => {
+      // Start reasoning
+      const chunk: chatCompletionChunk = {
+        choices: [{
+          delta: {
+            reasoning_content: 'Thinking',
+          },
+        }],
+      }
+      processor.processReasoningChunk(chunk)
+
+      // First finalize
+      const result1 = processor.finalize()
+      expect(result1).toBe('</think>')
+
+      // Second finalize should return empty
+      const result2 = processor.finalize()
+      expect(result2).toBe('')
+    })
+  })
+
+  describe('isReasoningInProgress', () => {
+    it('should track reasoning state correctly', () => {
+      expect(processor.isReasoningInProgress()).toBe(false)
+
+      // Start reasoning
+      const chunk1: chatCompletionChunk = {
+        choices: [{
+          delta: {
+            reasoning_content: 'Start thinking',
+          },
+        }],
+      }
+      processor.processReasoningChunk(chunk1)
+      expect(processor.isReasoningInProgress()).toBe(true)
+
+      // End with content
+      const chunk2: chatCompletionChunk = {
+        choices: [{
+          delta: {
+            content: 'Response',
+          },
+        }],
+      }
+      processor.processReasoningChunk(chunk2)
+      expect(processor.isReasoningInProgress()).toBe(false)
+    })
+  })
+
+  describe('integration scenarios', () => {
+    it('should handle complete reasoning flow', () => {
+      const chunks: chatCompletionChunk[] = [
+        {
+          choices: [{
+            delta: { reasoning_content: 'Let me think' },
+          }],
+        },
+        {
+          choices: [{
+            delta: { reasoning_content: ' about this problem' },
+          }],
+        },
+        {
+          choices: [{
+            delta: { reasoning_content: ' step by step.' },
+          }],
+        },
+        {
+          choices: [{
+            delta: { content: 'Based on my analysis,' },
+          }],
+        },
+        {
+          choices: [{
+            delta: { content: ' the answer is 42.' },
+          }],
+        },
+      ]
+
+      const results = chunks.map(chunk => processor.processReasoningChunk(chunk))
+
+      expect(results[0]).toBe('<think>Let me think')
+      expect(results[1]).toBe(' about this problem')
+      expect(results[2]).toBe(' step by step.')
+      expect(results[3]).toBe('</think>')
+      expect(results[4]).toBe('')
+
+      expect(processor.isReasoningInProgress()).toBe(false)
+    })
+
+    it('should handle reasoning without content', () => {
+      const chunk: chatCompletionChunk = {
+        choices: [{
+          delta: { reasoning_content: 'Only reasoning, no content' },
+        }],
+      }
+
+      const result1 = processor.processReasoningChunk(chunk)
+      expect(result1).toBe('<think>Only reasoning, no content')
+
+      const result2 = processor.finalize()
+      expect(result2).toBe('</think>')
+    })
+
+    it('should handle mixed reasoning and content chunks', () => {
+      // Reasoning then content then reasoning again (edge case)
+      const chunk1: chatCompletionChunk = {
+        choices: [{
+          delta: { reasoning_content: 'First thought' },
+        }],
+      }
+
+      const chunk2: chatCompletionChunk = {
+        choices: [{
+          delta: { content: 'Some content' },
+        }],
+      }
+
+      const chunk3: chatCompletionChunk = {
+        choices: [{
+          delta: { reasoning_content: 'Second thought' },
+        }],
+      }
+
+      const result1 = processor.processReasoningChunk(chunk1)
+      const result2 = processor.processReasoningChunk(chunk2)
+      const result3 = processor.processReasoningChunk(chunk3)
+
+      expect(result1).toBe('<think>First thought')
+      expect(result2).toBe('</think>')
+      expect(result3).toBe('<think>Second thought')
+    })
+  })
+})
--- a/web-app/src/utils/reasoning.ts
+++ b/web-app/src/utils/reasoning.ts
@ -5,6 +5,11 @@ import {
  chatCompletionRequestMessage,
 } from '@janhq/core'

+// Helper function to get reasoning content from an object
+function getReasoning(obj: { reasoning_content?: string | null; reasoning?: string | null } | null | undefined): string | null {
+  return obj?.reasoning_content ?? obj?.reasoning ?? null
+}
+
 // Extract reasoning from a message (for completed responses)
 export function extractReasoningFromMessage(
  message: chatCompletionRequestMessage | ChatCompletionMessage
@ -12,7 +17,7 @@ export function extractReasoningFromMessage(
  if (!message) return null

  const extendedMessage = message as chatCompletionRequestMessage
-  return extendedMessage.reasoning || null
+  return getReasoning(extendedMessage)
 }

 // Extract reasoning from a chunk (for streaming responses)
@ -22,7 +27,7 @@ function extractReasoningFromChunk(
  if (!chunk.choices?.[0]?.delta) return null

  const delta = chunk.choices[0].delta as chatCompletionRequestMessage
-  const reasoning = delta.reasoning
+  const reasoning = getReasoning(delta)

  // Return null for falsy values, non-strings, or whitespace-only strings
  if (!reasoning || typeof reasoning !== 'string' || !reasoning.trim())