diff --git a/web-app/src/containers/Capabilities.tsx b/web-app/src/containers/Capabilities.tsx index e2e09030a..07f3bf0d6 100644 --- a/web-app/src/containers/Capabilities.tsx +++ b/web-app/src/containers/Capabilities.tsx @@ -10,6 +10,7 @@ import { IconAtom, IconWorld, IconCodeCircle2, + IconSparkles, } from '@tabler/icons-react' import { Fragment } from 'react/jsx-runtime' @@ -29,6 +30,8 @@ const Capabilities = ({ capabilities }: CapabilitiesProps) => { icon = } else if (capability === 'tools') { icon = + } else if (capability === 'proactive') { + icon = } else if (capability === 'reasoning') { icon = } else if (capability === 'embeddings') { @@ -54,7 +57,11 @@ const Capabilities = ({ capabilities }: CapabilitiesProps) => {

- {capability === 'web_search' ? 'Web Search' : capability} + {capability === 'web_search' + ? 'Web Search' + : capability === 'proactive' + ? 'Proactive' + : capability}

diff --git a/web-app/src/containers/ModelInfoHoverCard.tsx b/web-app/src/containers/ModelInfoHoverCard.tsx index 63f5f3183..25edff914 100644 --- a/web-app/src/containers/ModelInfoHoverCard.tsx +++ b/web-app/src/containers/ModelInfoHoverCard.tsx @@ -152,12 +152,19 @@ export const ModelInfoHoverCard = ({ {/* Features Section */} - {(model.num_mmproj > 0 || model.tools) && ( + {(model.num_mmproj > 0 || model.tools || (model.num_mmproj > 0 && model.tools)) && (
Features
+ {model.tools && ( +
+ + Tools + +
+ )} {model.num_mmproj > 0 && (
@@ -165,10 +172,10 @@ export const ModelInfoHoverCard = ({
)} - {model.tools && ( + {model.num_mmproj > 0 && model.tools && (
- Tools + Proactive
)} diff --git a/web-app/src/containers/__tests__/Capabilities.test.tsx b/web-app/src/containers/__tests__/Capabilities.test.tsx new file mode 100644 index 000000000..a5e60c600 --- /dev/null +++ b/web-app/src/containers/__tests__/Capabilities.test.tsx @@ -0,0 +1,124 @@ +import { describe, it, expect, vi } from 'vitest' +import { render, screen } from '@testing-library/react' +import Capabilities from '../Capabilities' + +// Mock Tooltip components +vi.mock('@/components/ui/tooltip', () => ({ + Tooltip: ({ children }: { children: React.ReactNode }) =>
{children}
, + TooltipContent: ({ children }: { children: React.ReactNode }) =>
{children}
, + TooltipProvider: ({ children }: { children: React.ReactNode }) =>
{children}
, + TooltipTrigger: ({ children }: { children: React.ReactNode }) =>
{children}
, +})) + +// Mock Tabler icons +vi.mock('@tabler/icons-react', () => ({ + IconEye: () =>
Eye Icon
, + IconTool: () =>
Tool Icon
, + IconSparkles: () =>
Sparkles Icon
, + IconAtom: () =>
Atom Icon
, + IconWorld: () =>
World Icon
, + IconCodeCircle2: () =>
Code Icon
, +})) + +describe('Capabilities', () => { + it('should render vision capability with eye icon', () => { + render() + + const eyeIcon = screen.getByTestId('icon-eye') + expect(eyeIcon).toBeInTheDocument() + }) + + it('should render tools capability with tool icon', () => { + render() + + const toolIcon = screen.getByTestId('icon-tool') + expect(toolIcon).toBeInTheDocument() + }) + + it('should render proactive capability with sparkles icon', () => { + render() + + const sparklesIcon = screen.getByTestId('icon-sparkles') + expect(sparklesIcon).toBeInTheDocument() + }) + + it('should render reasoning capability with atom icon', () => { + render() + + const atomIcon = screen.getByTestId('icon-atom') + expect(atomIcon).toBeInTheDocument() + }) + + it('should render web_search capability with world icon', () => { + render() + + const worldIcon = screen.getByTestId('icon-world') + expect(worldIcon).toBeInTheDocument() + }) + + it('should render embeddings capability with code icon', () => { + render() + + const codeIcon = screen.getByTestId('icon-code') + expect(codeIcon).toBeInTheDocument() + }) + + it('should render multiple capabilities', () => { + render() + + expect(screen.getByTestId('icon-tool')).toBeInTheDocument() + expect(screen.getByTestId('icon-eye')).toBeInTheDocument() + expect(screen.getByTestId('icon-sparkles')).toBeInTheDocument() + }) + + it('should render all capabilities in correct order', () => { + render() + + expect(screen.getByTestId('icon-tool')).toBeInTheDocument() + expect(screen.getByTestId('icon-eye')).toBeInTheDocument() + expect(screen.getByTestId('icon-sparkles')).toBeInTheDocument() + expect(screen.getByTestId('icon-atom')).toBeInTheDocument() + expect(screen.getByTestId('icon-world')).toBeInTheDocument() + expect(screen.getByTestId('icon-code')).toBeInTheDocument() + }) + + it('should handle empty capabilities array', () => { + const { container } = render() + + expect(container.querySelector('[data-testid^="icon-"]')).not.toBeInTheDocument() + }) + + it('should handle unknown capabilities gracefully', () => { + const { container } = render() + + expect(container).toBeInTheDocument() + }) + + it('should display proactive tooltip with correct text', () => { + render() + + // The tooltip content should be 'Proactive' + expect(screen.getByTestId('icon-sparkles')).toBeInTheDocument() + }) + + it('should render proactive icon between tools/vision and reasoning', () => { + const { container } = render() + + // All icons should be rendered + expect(screen.getByTestId('icon-tool')).toBeInTheDocument() + expect(screen.getByTestId('icon-eye')).toBeInTheDocument() + expect(screen.getByTestId('icon-sparkles')).toBeInTheDocument() + expect(screen.getByTestId('icon-atom')).toBeInTheDocument() + + expect(container.querySelector('[data-testid="icon-sparkles"]')).toBeInTheDocument() + }) + + it('should apply correct CSS classes to proactive icon', () => { + render() + + const sparklesIcon = screen.getByTestId('icon-sparkles') + expect(sparklesIcon).toBeInTheDocument() + // Icon should have size-3.5 class (same as tools, reasoning, etc.) + expect(sparklesIcon.parentElement).toBeInTheDocument() + }) +}) diff --git a/web-app/src/containers/__tests__/ChatInput.test.tsx b/web-app/src/containers/__tests__/ChatInput.test.tsx index 642313ec7..a1c24d3e3 100644 --- a/web-app/src/containers/__tests__/ChatInput.test.tsx +++ b/web-app/src/containers/__tests__/ChatInput.test.tsx @@ -437,4 +437,31 @@ describe('ChatInput', () => { expect(() => renderWithRouter()).not.toThrow() }) }) + + describe('Proactive Mode', () => { + it('should render ChatInput with proactive capable model', async () => { + await act(async () => { + renderWithRouter() + }) + + expect(screen.getByTestId('chat-input')).toBeInTheDocument() + }) + + it('should handle proactive capability detection', async () => { + await act(async () => { + renderWithRouter() + }) + + expect(screen.getByTestId('chat-input')).toBeInTheDocument() + }) + + it('should work with models that have multiple capabilities', async () => { + await act(async () => { + renderWithRouter() + }) + + expect(screen.getByTestId('chat-input')).toBeInTheDocument() + }) + + }) }) diff --git a/web-app/src/containers/__tests__/EditModel.test.tsx b/web-app/src/containers/__tests__/EditModel.test.tsx index 6c0dfd059..345bc91d6 100644 --- a/web-app/src/containers/__tests__/EditModel.test.tsx +++ b/web-app/src/containers/__tests__/EditModel.test.tsx @@ -82,6 +82,7 @@ vi.mock('@tabler/icons-react', () => ({ IconEye: () =>
, IconTool: () =>
, IconLoader2: () =>
, + IconSparkles: () =>
, })) describe('DialogEditModel - Basic Component Tests', () => { @@ -189,7 +190,7 @@ describe('DialogEditModel - Basic Component Tests', () => { { id: 'test-model.gguf', displayName: 'Test Model', - capabilities: ['vision', 'tools'], + capabilities: ['vision', 'tools', 'proactive'], }, ], settings: [], @@ -226,7 +227,7 @@ describe('DialogEditModel - Basic Component Tests', () => { { id: 'test-model.gguf', displayName: 'Test Model', - capabilities: ['vision', 'tools', 'completion', 'embeddings', 'web_search', 'reasoning'], + capabilities: ['vision', 'tools', 'proactive', 'completion', 'embeddings', 'web_search', 'reasoning'], }, ], settings: [], @@ -240,7 +241,7 @@ describe('DialogEditModel - Basic Component Tests', () => { ) // Component should render without errors even with extra capabilities - // The capabilities helper should only extract vision and tools + // The capabilities helper should only extract vision, tools, and proactive expect(container).toBeInTheDocument() }) }) diff --git a/web-app/src/containers/dialogs/EditModel.tsx b/web-app/src/containers/dialogs/EditModel.tsx index f7dec06eb..78f6e93c2 100644 --- a/web-app/src/containers/dialogs/EditModel.tsx +++ b/web-app/src/containers/dialogs/EditModel.tsx @@ -17,6 +17,7 @@ import { IconTool, IconAlertTriangle, IconLoader2, + IconSparkles, } from '@tabler/icons-react' import { useState, useEffect } from 'react' import { useTranslation } from '@/i18n/react-i18next-compat' @@ -45,6 +46,7 @@ export const DialogEditModel = ({ const [capabilities, setCapabilities] = useState>({ vision: false, tools: false, + proactive: false, }) // Initialize with the provided model ID or the first model if available @@ -67,6 +69,7 @@ export const DialogEditModel = ({ const capabilitiesToObject = (capabilitiesList: string[]) => ({ vision: capabilitiesList.includes('vision'), tools: capabilitiesList.includes('tools'), + proactive: capabilitiesList.includes('proactive'), }) // Initialize capabilities and display name from selected model @@ -268,6 +271,23 @@ export const DialogEditModel = ({ disabled={isLoading} />
+ +
+
+ + + {t('providers:editModel.proactive')} + +
+ + handleCapabilityChange('proactive', checked) + } + disabled={isLoading || !(capabilities.tools && capabilities.vision)} + /> +
diff --git a/web-app/src/hooks/__tests__/useChat.test.ts b/web-app/src/hooks/__tests__/useChat.test.ts index e87191fb6..c7c576cf0 100644 --- a/web-app/src/hooks/__tests__/useChat.test.ts +++ b/web-app/src/hooks/__tests__/useChat.test.ts @@ -170,6 +170,7 @@ vi.mock('@/lib/completion', () => ({ sendCompletion: vi.fn(), postMessageProcessing: vi.fn(), isCompletionResponse: vi.fn(), + captureProactiveScreenshots: vi.fn(() => Promise.resolve([])), })) vi.mock('@/lib/messages', () => ({ @@ -225,4 +226,26 @@ describe('useChat', () => { expect(result.current).toBeDefined() }) + + describe('Proactive Mode', () => { + it('should detect proactive mode when model has proactive capability', () => { + const { result } = renderHook(() => useChat()) + + expect(result.current).toBeDefined() + expect(typeof result.current).toBe('function') + }) + + it('should handle model with tools, vision, and proactive capabilities', () => { + const { result } = renderHook(() => useChat()) + + expect(result.current).toBeDefined() + }) + + it('should work with models that have proactive capability', () => { + const { result } = renderHook(() => useChat()) + + expect(result.current).toBeDefined() + expect(typeof result.current).toBe('function') + }) + }) }) diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts index 15d06f506..9bc550607 100644 --- a/web-app/src/hooks/useChat.ts +++ b/web-app/src/hooks/useChat.ts @@ -16,6 +16,7 @@ import { newUserThreadContent, postMessageProcessing, sendCompletion, + captureProactiveScreenshots, } from '@/lib/completion' import { CompletionMessagesBuilder } from '@/lib/messages' import { renderInstructions } from '@/lib/instructionTemplate' @@ -419,6 +420,27 @@ export const useChat = () => { }) : [] + // Check if proactive mode is enabled + const isProactiveMode = selectedModel?.capabilities?.includes('proactive') ?? false + + // Proactive mode: Capture initial screenshot/snapshot before first LLM call + if (isProactiveMode && availableTools.length > 0 && !abortController.signal.aborted) { + console.log('Proactive mode: Capturing initial screenshots before LLM call') + try { + const initialScreenshots = await captureProactiveScreenshots(abortController) + + // Add initial screenshots to builder + for (const screenshot of initialScreenshots) { + // Generate unique tool call ID for initial screenshot + const proactiveToolCallId = `proactive_initial_${Date.now()}_${Math.random()}` + builder.addToolMessage(screenshot, proactiveToolCallId) + console.log('Initial proactive screenshot added to context') + } + } catch (e) { + console.warn('Failed to capture initial proactive screenshots:', e) + } + } + let assistantLoopSteps = 0 while ( @@ -694,6 +716,10 @@ export const useChat = () => { ) builder.addAssistantMessage(accumulatedText, undefined, toolCalls) + + // Check if proactive mode is enabled for this model + const isProactiveMode = selectedModel?.capabilities?.includes('proactive') ?? false + const updatedMessage = await postMessageProcessing( toolCalls, builder, @@ -701,7 +727,8 @@ export const useChat = () => { abortController, useToolApproval.getState().approvedTools, allowAllMCPPermissions ? undefined : showApprovalModal, - allowAllMCPPermissions + allowAllMCPPermissions, + isProactiveMode ) addMessage(updatedMessage ?? finalContent) updateStreamingContent(emptyThreadContent) diff --git a/web-app/src/lib/__tests__/completion.test.ts b/web-app/src/lib/__tests__/completion.test.ts index 2b3ccaec7..f8fed4fec 100644 --- a/web-app/src/lib/__tests__/completion.test.ts +++ b/web-app/src/lib/__tests__/completion.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect, vi, beforeEach } from 'vitest' -import { +import { newUserThreadContent, newAssistantThreadContent, emptyThreadContent, @@ -8,7 +8,8 @@ import { stopModel, normalizeTools, extractToolCall, - postMessageProcessing + postMessageProcessing, + captureProactiveScreenshots } from '../completion' // Mock dependencies @@ -72,6 +73,54 @@ vi.mock('../extension', () => ({ ExtensionManager: {}, })) +vi.mock('@/hooks/useServiceHub', () => ({ + getServiceHub: vi.fn(() => ({ + mcp: vi.fn(() => ({ + getTools: vi.fn(() => Promise.resolve([])), + callToolWithCancellation: vi.fn(() => ({ + promise: Promise.resolve({ + content: [{ type: 'text', text: 'mock result' }], + error: '', + }), + cancel: vi.fn(), + })), + })), + rag: vi.fn(() => ({ + getToolNames: vi.fn(() => Promise.resolve([])), + callTool: vi.fn(() => Promise.resolve({ + content: [{ type: 'text', text: 'mock rag result' }], + error: '', + })), + })), + })), +})) + +vi.mock('@/hooks/useAttachments', () => ({ + useAttachments: { + getState: vi.fn(() => ({ enabled: true })), + }, +})) + +vi.mock('@/hooks/useAppState', () => ({ + useAppState: { + getState: vi.fn(() => ({ + setCancelToolCall: vi.fn(), + })), + }, +})) + +vi.mock('@/lib/platform/const', () => ({ + PlatformFeatures: { + ATTACHMENTS: true, + }, +})) + +vi.mock('@/lib/platform/types', () => ({ + PlatformFeature: { + ATTACHMENTS: 'ATTACHMENTS', + }, +})) + describe('completion.ts', () => { beforeEach(() => { vi.clearAllMocks() @@ -187,4 +236,448 @@ describe('completion.ts', () => { expect(result.length).toBe(0) }) }) + + describe('Proactive Mode - Browser MCP Tool Detection', () => { + // We need to access the private function, so we'll test it through postMessageProcessing + it('should detect browser tool names with "browser" prefix', async () => { + const { getServiceHub } = await import('@/hooks/useServiceHub') + const mockGetTools = vi.fn(() => Promise.resolve([])) + const mockMcp = { + getTools: mockGetTools, + callToolWithCancellation: vi.fn(() => ({ + promise: Promise.resolve({ content: [{ type: 'text', text: 'result' }], error: '' }), + cancel: vi.fn(), + })) + } + vi.mocked(getServiceHub).mockReturnValue({ + mcp: () => mockMcp, + rag: () => ({ getToolNames: () => Promise.resolve([]) }) + } as any) + + const calls = [{ + id: 'call_1', + type: 'function' as const, + function: { name: 'browserbase_navigate', arguments: '{"url": "test.com"}' } + }] + const builder = { + addToolMessage: vi.fn(), + getMessages: vi.fn(() => []) + } as any + const message = { thread_id: 'test-thread', metadata: {} } as any + const abortController = new AbortController() + + await postMessageProcessing( + calls, + builder, + message, + abortController, + {}, + undefined, + false, + true // isProactiveMode = true + ) + + // Verify tool was executed + expect(mockMcp.callToolWithCancellation).toHaveBeenCalled() + }) + + it('should detect browserbase tools', async () => { + const { getServiceHub } = await import('@/hooks/useServiceHub') + const mockCallTool = vi.fn(() => ({ + promise: Promise.resolve({ content: [{ type: 'text', text: 'result' }], error: '' }), + cancel: vi.fn(), + })) + vi.mocked(getServiceHub).mockReturnValue({ + mcp: () => ({ + getTools: () => Promise.resolve([]), + callToolWithCancellation: mockCallTool + }), + rag: () => ({ getToolNames: () => Promise.resolve([]) }) + } as any) + + const calls = [{ + id: 'call_1', + type: 'function' as const, + function: { name: 'browserbase_screenshot', arguments: '{}' } + }] + const builder = { + addToolMessage: vi.fn(), + getMessages: vi.fn(() => []) + } as any + const message = { thread_id: 'test-thread', metadata: {} } as any + const abortController = new AbortController() + + await postMessageProcessing(calls, builder, message, abortController, {}, undefined, false, true) + + expect(mockCallTool).toHaveBeenCalled() + }) + + it('should detect multi_browserbase tools', async () => { + const { getServiceHub } = await import('@/hooks/useServiceHub') + const mockCallTool = vi.fn(() => ({ + promise: Promise.resolve({ content: [{ type: 'text', text: 'result' }], error: '' }), + cancel: vi.fn(), + })) + vi.mocked(getServiceHub).mockReturnValue({ + mcp: () => ({ + getTools: () => Promise.resolve([]), + callToolWithCancellation: mockCallTool + }), + rag: () => ({ getToolNames: () => Promise.resolve([]) }) + } as any) + + const calls = [{ + id: 'call_1', + type: 'function' as const, + function: { name: 'multi_browserbase_stagehand_navigate', arguments: '{}' } + }] + const builder = { + addToolMessage: vi.fn(), + getMessages: vi.fn(() => []) + } as any + const message = { thread_id: 'test-thread', metadata: {} } as any + const abortController = new AbortController() + + await postMessageProcessing(calls, builder, message, abortController, {}, undefined, false, true) + + expect(mockCallTool).toHaveBeenCalled() + }) + + it('should not treat non-browser tools as browser tools', async () => { + const { getServiceHub } = await import('@/hooks/useServiceHub') + const mockGetTools = vi.fn(() => Promise.resolve([])) + vi.mocked(getServiceHub).mockReturnValue({ + mcp: () => ({ + getTools: mockGetTools, + callToolWithCancellation: vi.fn(() => ({ + promise: Promise.resolve({ content: [{ type: 'text', text: 'result' }], error: '' }), + cancel: vi.fn(), + })) + }), + rag: () => ({ getToolNames: () => Promise.resolve([]) }) + } as any) + + const calls = [{ + id: 'call_1', + type: 'function' as const, + function: { name: 'fetch_url', arguments: '{"url": "test.com"}' } + }] + const builder = { + addToolMessage: vi.fn(), + getMessages: vi.fn(() => []) + } as any + const message = { thread_id: 'test-thread', metadata: {} } as any + const abortController = new AbortController() + + await postMessageProcessing(calls, builder, message, abortController, {}, undefined, false, true) + + // Proactive screenshots should not be called for non-browser tools + expect(mockGetTools).not.toHaveBeenCalled() + }) + }) + + describe('Proactive Mode - Screenshot Capture', () => { + it('should capture screenshot and snapshot when available', async () => { + const { getServiceHub } = await import('@/hooks/useServiceHub') + const mockScreenshotResult = { + content: [{ type: 'image', data: 'base64screenshot', mimeType: 'image/png' }], + error: '', + } + const mockSnapshotResult = { + content: [{ type: 'text', text: 'snapshot html' }], + error: '', + } + + const mockGetTools = vi.fn(() => Promise.resolve([ + { name: 'browserbase_screenshot', inputSchema: {} }, + { name: 'browserbase_snapshot', inputSchema: {} } + ])) + const mockCallTool = vi.fn() + .mockReturnValueOnce({ + promise: Promise.resolve(mockScreenshotResult), + cancel: vi.fn(), + }) + .mockReturnValueOnce({ + promise: Promise.resolve(mockSnapshotResult), + cancel: vi.fn(), + }) + + vi.mocked(getServiceHub).mockReturnValue({ + mcp: () => ({ + getTools: mockGetTools, + callToolWithCancellation: mockCallTool + }) + } as any) + + const abortController = new AbortController() + const results = await captureProactiveScreenshots(abortController) + + expect(results).toHaveLength(2) + expect(results[0]).toEqual(mockScreenshotResult) + expect(results[1]).toEqual(mockSnapshotResult) + expect(mockCallTool).toHaveBeenCalledTimes(2) + }) + + it('should handle missing screenshot tool gracefully', async () => { + const { getServiceHub } = await import('@/hooks/useServiceHub') + const mockGetTools = vi.fn(() => Promise.resolve([ + { name: 'some_other_tool', inputSchema: {} } + ])) + + vi.mocked(getServiceHub).mockReturnValue({ + mcp: () => ({ + getTools: mockGetTools, + callToolWithCancellation: vi.fn() + }) + } as any) + + const abortController = new AbortController() + const results = await captureProactiveScreenshots(abortController) + + expect(results).toHaveLength(0) + }) + + it('should handle screenshot capture errors gracefully', async () => { + const { getServiceHub } = await import('@/hooks/useServiceHub') + const mockGetTools = vi.fn(() => Promise.resolve([ + { name: 'browserbase_screenshot', inputSchema: {} } + ])) + const mockCallTool = vi.fn(() => ({ + promise: Promise.reject(new Error('Screenshot failed')), + cancel: vi.fn(), + })) + + vi.mocked(getServiceHub).mockReturnValue({ + mcp: () => ({ + getTools: mockGetTools, + callToolWithCancellation: mockCallTool + }) + } as any) + + const abortController = new AbortController() + const results = await captureProactiveScreenshots(abortController) + + // Should return empty array on error, not throw + expect(results).toHaveLength(0) + }) + + it('should respect abort controller', async () => { + const { getServiceHub } = await import('@/hooks/useServiceHub') + const mockGetTools = vi.fn(() => Promise.resolve([ + { name: 'browserbase_screenshot', inputSchema: {} } + ])) + const mockCallTool = vi.fn(() => ({ + promise: new Promise((resolve) => setTimeout(() => resolve({ + content: [{ type: 'image', data: 'base64', mimeType: 'image/png' }], + error: '', + }), 100)), + cancel: vi.fn(), + })) + + vi.mocked(getServiceHub).mockReturnValue({ + mcp: () => ({ + getTools: mockGetTools, + callToolWithCancellation: mockCallTool + }) + } as any) + + const abortController = new AbortController() + abortController.abort() + + const results = await captureProactiveScreenshots(abortController) + + // Should not attempt to capture if already aborted + expect(results).toHaveLength(0) + }) + }) + + describe('Proactive Mode - Screenshot Filtering', () => { + it('should filter out old image_url content from tool messages', () => { + const builder = { + messages: [ + { role: 'user', content: 'Hello' }, + { + role: 'tool', + content: [ + { type: 'text', text: 'Tool result' }, + { type: 'image_url', image_url: { url: '' } } + ], + tool_call_id: 'old_call' + }, + { role: 'assistant', content: 'Response' }, + ] + } + + expect(builder.messages).toHaveLength(3) + }) + }) + + describe('Proactive Mode - Integration', () => { + it('should trigger proactive screenshots after browser tool execution', async () => { + const { getServiceHub } = await import('@/hooks/useServiceHub') + + const mockScreenshotResult = { + content: [{ type: 'image', data: 'proactive_screenshot', mimeType: 'image/png' }], + error: '', + } + + const mockGetTools = vi.fn(() => Promise.resolve([ + { name: 'browserbase_screenshot', inputSchema: {} } + ])) + + let callCount = 0 + const mockCallTool = vi.fn(() => { + callCount++ + if (callCount === 1) { + // First call: the browser tool itself + return { + promise: Promise.resolve({ + content: [{ type: 'text', text: 'navigated to page' }], + error: '', + }), + cancel: vi.fn(), + } + } else { + // Second call: proactive screenshot + return { + promise: Promise.resolve(mockScreenshotResult), + cancel: vi.fn(), + } + } + }) + + vi.mocked(getServiceHub).mockReturnValue({ + mcp: () => ({ + getTools: mockGetTools, + callToolWithCancellation: mockCallTool + }), + rag: () => ({ getToolNames: () => Promise.resolve([]) }) + } as any) + + const calls = [{ + id: 'call_1', + type: 'function' as const, + function: { name: 'browserbase_navigate', arguments: '{"url": "test.com"}' } + }] + const builder = { + addToolMessage: vi.fn(), + getMessages: vi.fn(() => []) + } as any + const message = { thread_id: 'test-thread', metadata: {} } as any + const abortController = new AbortController() + + await postMessageProcessing( + calls, + builder, + message, + abortController, + {}, + undefined, + false, + true + ) + + // Should have called: 1) browser tool, 2) getTools, 3) proactive screenshot + expect(mockCallTool).toHaveBeenCalledTimes(2) + expect(mockGetTools).toHaveBeenCalled() + expect(builder.addToolMessage).toHaveBeenCalledTimes(2) + }) + + it('should not trigger proactive screenshots when mode is disabled', async () => { + const { getServiceHub } = await import('@/hooks/useServiceHub') + + const mockGetTools = vi.fn(() => Promise.resolve([ + { name: 'browserbase_screenshot', inputSchema: {} } + ])) + + const mockCallTool = vi.fn(() => ({ + promise: Promise.resolve({ + content: [{ type: 'text', text: 'navigated' }], + error: '', + }), + cancel: vi.fn(), + })) + + vi.mocked(getServiceHub).mockReturnValue({ + mcp: () => ({ + getTools: mockGetTools, + callToolWithCancellation: mockCallTool + }), + rag: () => ({ getToolNames: () => Promise.resolve([]) }) + } as any) + + const calls = [{ + id: 'call_1', + type: 'function' as const, + function: { name: 'browserbase_navigate', arguments: '{}' } + }] + const builder = { + addToolMessage: vi.fn(), + getMessages: vi.fn(() => []) + } as any + const message = { thread_id: 'test-thread', metadata: {} } as any + const abortController = new AbortController() + + await postMessageProcessing( + calls, + builder, + message, + abortController, + {}, + undefined, + false, + false + ) + + expect(mockCallTool).toHaveBeenCalledTimes(1) + expect(mockGetTools).not.toHaveBeenCalled() + }) + + it('should not trigger proactive screenshots for non-browser tools', async () => { + const { getServiceHub } = await import('@/hooks/useServiceHub') + + const mockGetTools = vi.fn(() => Promise.resolve([])) + const mockCallTool = vi.fn(() => ({ + promise: Promise.resolve({ + content: [{ type: 'text', text: 'fetched data' }], + error: '', + }), + cancel: vi.fn(), + })) + + vi.mocked(getServiceHub).mockReturnValue({ + mcp: () => ({ + getTools: mockGetTools, + callToolWithCancellation: mockCallTool + }), + rag: () => ({ getToolNames: () => Promise.resolve([]) }) + } as any) + + const calls = [{ + id: 'call_1', + type: 'function' as const, + function: { name: 'fetch_url', arguments: '{"url": "test.com"}' } + }] + const builder = { + addToolMessage: vi.fn(), + getMessages: vi.fn(() => []) + } as any + const message = { thread_id: 'test-thread', metadata: {} } as any + const abortController = new AbortController() + + await postMessageProcessing( + calls, + builder, + message, + abortController, + {}, + undefined, + false, + true + ) + + expect(mockCallTool).toHaveBeenCalledTimes(1) + expect(mockGetTools).not.toHaveBeenCalled() + }) + }) }) diff --git a/web-app/src/lib/completion.ts b/web-app/src/lib/completion.ts index 7edb4d3b7..4a90982de 100644 --- a/web-app/src/lib/completion.ts +++ b/web-app/src/lib/completion.ts @@ -396,6 +396,120 @@ export const extractToolCall = ( return calls } +/** + * Helper function to check if a tool call is a browser MCP tool + * @param toolName - The name of the tool + * @returns true if the tool is a browser-related MCP tool + */ +const isBrowserMCPTool = (toolName: string): boolean => { + const browserToolPrefixes = [ + 'browser', + 'browserbase', + 'browsermcp', + 'multi_browserbase', + ] + return browserToolPrefixes.some((prefix) => + toolName.toLowerCase().startsWith(prefix) + ) +} + +/** + * Helper function to capture screenshot and snapshot proactively + * @param abortController - The abort controller for cancellation + * @returns Promise with screenshot and snapshot results + */ +export const captureProactiveScreenshots = async ( + abortController: AbortController +): Promise => { + const results: ToolResult[] = [] + + try { + // Get available tools + const allTools = await getServiceHub().mcp().getTools() + + // Find screenshot and snapshot tools + const screenshotTool = allTools.find((t) => + t.name.toLowerCase().includes('screenshot') + ) + const snapshotTool = allTools.find((t) => + t.name.toLowerCase().includes('snapshot') + ) + + // Capture screenshot if available + if (screenshotTool && !abortController.signal.aborted) { + try { + const { promise } = getServiceHub().mcp().callToolWithCancellation({ + toolName: screenshotTool.name, + arguments: {}, + }) + const screenshotResult = await promise + if (screenshotResult && typeof screenshotResult !== 'string') { + results.push(screenshotResult as ToolResult) + } + } catch (e) { + console.warn('Failed to capture proactive screenshot:', e) + } + } + + // Capture snapshot if available + if (snapshotTool && !abortController.signal.aborted) { + try { + const { promise } = getServiceHub().mcp().callToolWithCancellation({ + toolName: snapshotTool.name, + arguments: {}, + }) + const snapshotResult = await promise + if (snapshotResult && typeof snapshotResult !== 'string') { + results.push(snapshotResult as ToolResult) + } + } catch (e) { + console.warn('Failed to capture proactive snapshot:', e) + } + } + } catch (e) { + console.error('Failed to get MCP tools for proactive capture:', e) + } + + return results +} + +/** + * Helper function to filter out old screenshot/snapshot images from builder messages + * Keeps only the latest proactive screenshots + * @param builder - The completion messages builder + */ +const filterOldProactiveScreenshots = (builder: CompletionMessagesBuilder) => { + const messages = builder.getMessages() + const filteredMessages: any[] = [] + + for (const msg of messages) { + if (msg.role === 'tool') { + // If it's a tool message with array content (multimodal) + if (Array.isArray(msg.content)) { + // Filter out images, keep text only for old tool messages + const textOnly = msg.content.filter( + (part: any) => part.type !== 'image_url' + ) + if (textOnly.length > 0) { + filteredMessages.push({ ...msg, content: textOnly }) + } + } else { + // Keep string content as-is + filteredMessages.push(msg) + } + } else { + // Keep all non-tool messages + filteredMessages.push(msg) + } + } + + // Reconstruct builder with filtered messages + // Note: This is a workaround since CompletionMessagesBuilder doesn't have a setter + // We'll need to access the private messages array + // eslint-disable-next-line no-extra-semi + ;(builder as any).messages = filteredMessages +} + /** * @fileoverview Helper function to process the completion response. * @param calls @@ -405,6 +519,7 @@ export const extractToolCall = ( * @param approvedTools * @param showModal * @param allowAllMCPPermissions + * @param isProactiveMode */ export const postMessageProcessing = async ( calls: ChatCompletionMessageToolCall[], @@ -417,7 +532,8 @@ export const postMessageProcessing = async ( threadId: string, toolParameters?: object ) => Promise, - allowAllMCPPermissions: boolean = false + allowAllMCPPermissions: boolean = false, + isProactiveMode: boolean = false ) => { // Handle completed tool calls if (calls.length) { @@ -473,6 +589,7 @@ export const postMessageProcessing = async ( const toolName = toolCall.function.name const toolArgs = toolCall.function.arguments.length ? toolParameters : {} const isRagTool = ragToolNames.has(toolName) + const isBrowserTool = isBrowserMCPTool(toolName) // Auto-approve RAG tools (local/safe operations), require permission for MCP tools const approved = isRagTool @@ -562,6 +679,27 @@ export const postMessageProcessing = async ( ], } builder.addToolMessage(result as ToolResult, toolCall.id) + + // Proactive mode: Capture screenshot/snapshot after browser tool execution + if (isProactiveMode && isBrowserTool && !abortController.signal.aborted) { + console.log('Proactive mode: Capturing screenshots after browser tool call') + + // Filter out old screenshots before adding new ones + filterOldProactiveScreenshots(builder) + + // Capture new screenshots + const proactiveScreenshots = await captureProactiveScreenshots(abortController) + + // Add proactive screenshots to builder + for (const screenshot of proactiveScreenshots) { + // Generate a unique tool call ID for the proactive screenshot + const proactiveToolCallId = ulid() + builder.addToolMessage(screenshot, proactiveToolCallId) + + console.log('Proactive screenshot captured and added to context') + } + } + // update message metadata } return message diff --git a/web-app/src/locales/de-DE/common.json b/web-app/src/locales/de-DE/common.json index 699c15a08..f79883980 100644 --- a/web-app/src/locales/de-DE/common.json +++ b/web-app/src/locales/de-DE/common.json @@ -80,6 +80,7 @@ "tools": "Werkzeuge", "webSearch": "Web Suche", "reasoning": "Argumentation", + "proactive": "Proaktiv", "selectAModel": "Wähle ein Modell", "noToolsAvailable": "Keine Werkzeuge verfügbar", "noModelsFoundFor": "Keine Modelle gefunden zu \"{{searchValue}}\"", diff --git a/web-app/src/locales/de-DE/providers.json b/web-app/src/locales/de-DE/providers.json index 39c52e047..9f75f4cde 100644 --- a/web-app/src/locales/de-DE/providers.json +++ b/web-app/src/locales/de-DE/providers.json @@ -61,6 +61,7 @@ "capabilities": "Fähigkeiten", "tools": "Werkzeuge", "vision": "Vision", + "proactive": "Proaktiv (Experimentell)", "embeddings": "Einbettungen", "notAvailable": "Noch nicht verfügbar", "warning": { diff --git a/web-app/src/locales/en/common.json b/web-app/src/locales/en/common.json index 026f430e8..950879bf6 100644 --- a/web-app/src/locales/en/common.json +++ b/web-app/src/locales/en/common.json @@ -81,6 +81,7 @@ "tools": "Tools", "webSearch": "Web Search", "reasoning": "Reasoning", + "proactive": "Proactive", "selectAModel": "Select a model", "noToolsAvailable": "No tools available", "noModelsFoundFor": "No models found for \"{{searchValue}}\"", diff --git a/web-app/src/locales/en/providers.json b/web-app/src/locales/en/providers.json index 2683432f9..48eb30e12 100644 --- a/web-app/src/locales/en/providers.json +++ b/web-app/src/locales/en/providers.json @@ -61,6 +61,7 @@ "capabilities": "Capabilities", "tools": "Tools", "vision": "Vision", + "proactive": "Proactive (Experimental)", "embeddings": "Embeddings", "notAvailable": "Not available yet", "warning": { diff --git a/web-app/src/locales/id/common.json b/web-app/src/locales/id/common.json index 77af93d31..7f9bfaeea 100644 --- a/web-app/src/locales/id/common.json +++ b/web-app/src/locales/id/common.json @@ -80,6 +80,7 @@ "tools": "Alat", "webSearch": "Pencarian Web", "reasoning": "Penalaran", + "proactive": "Proaktif", "selectAModel": "Pilih model", "noToolsAvailable": "Tidak ada alat yang tersedia", "noModelsFoundFor": "Tidak ada model yang ditemukan untuk \"{{searchValue}}\"", diff --git a/web-app/src/locales/id/providers.json b/web-app/src/locales/id/providers.json index 5f89d69c6..1679b5b45 100644 --- a/web-app/src/locales/id/providers.json +++ b/web-app/src/locales/id/providers.json @@ -61,6 +61,7 @@ "capabilities": "Kemampuan", "tools": "Alat", "vision": "Visi", + "proactive": "Proaktif (Eksperimental)", "embeddings": "Embedding", "notAvailable": "Belum tersedia", "warning": { diff --git a/web-app/src/locales/pl/common.json b/web-app/src/locales/pl/common.json index ee25f6068..0676a8be3 100644 --- a/web-app/src/locales/pl/common.json +++ b/web-app/src/locales/pl/common.json @@ -80,6 +80,7 @@ "tools": "Narzędzia", "webSearch": "Szukanie w Sieci", "reasoning": "Rozumowanie", + "proactive": "Proaktywny", "selectAModel": "Wybierz Model", "noToolsAvailable": "Brak narzędzi", "noModelsFoundFor": "Brak modeli dla \"{{searchValue}}\"", diff --git a/web-app/src/locales/pl/providers.json b/web-app/src/locales/pl/providers.json index c1c03434e..6100db994 100644 --- a/web-app/src/locales/pl/providers.json +++ b/web-app/src/locales/pl/providers.json @@ -61,6 +61,7 @@ "capabilities": "Możliwości", "tools": "Narzędzia", "vision": "Wizja", + "proactive": "Proaktywny (Eksperymentalny)", "embeddings": "Osadzenia", "notAvailable": "Jeszcze niedostępne", "warning": { diff --git a/web-app/src/locales/vn/common.json b/web-app/src/locales/vn/common.json index 28ddd29a7..6239d9686 100644 --- a/web-app/src/locales/vn/common.json +++ b/web-app/src/locales/vn/common.json @@ -80,6 +80,7 @@ "tools": "Công cụ", "webSearch": "Tìm kiếm trên web", "reasoning": "Lý luận", + "proactive": "Chủ động", "selectAModel": "Chọn một mô hình", "noToolsAvailable": "Không có công cụ nào", "noModelsFoundFor": "Không tìm thấy mô hình nào cho \"{{searchValue}}\"", diff --git a/web-app/src/locales/vn/providers.json b/web-app/src/locales/vn/providers.json index 8c0e6d1b8..adf7e6528 100644 --- a/web-app/src/locales/vn/providers.json +++ b/web-app/src/locales/vn/providers.json @@ -61,6 +61,7 @@ "capabilities": "Khả năng", "tools": "Công cụ", "vision": "Thị giác", + "proactive": "Chủ động (Thử nghiệm)", "embeddings": "Nhúng", "notAvailable": "Chưa có", "warning": { diff --git a/web-app/src/locales/zh-CN/common.json b/web-app/src/locales/zh-CN/common.json index 69b15ac90..7ba859f09 100644 --- a/web-app/src/locales/zh-CN/common.json +++ b/web-app/src/locales/zh-CN/common.json @@ -80,6 +80,7 @@ "tools": "工具", "webSearch": "网页搜索", "reasoning": "推理", + "proactive": "主动模式", "selectAModel": "选择一个模型", "noToolsAvailable": "无可用工具", "noModelsFoundFor": "未找到“{{searchValue}}”的模型", diff --git a/web-app/src/locales/zh-CN/providers.json b/web-app/src/locales/zh-CN/providers.json index 2ca2beb2e..c4e6b03cb 100644 --- a/web-app/src/locales/zh-CN/providers.json +++ b/web-app/src/locales/zh-CN/providers.json @@ -61,6 +61,7 @@ "capabilities": "功能", "tools": "工具", "vision": "视觉", + "proactive": "主动模式(实验性)", "embeddings": "嵌入", "notAvailable": "尚不可用", "warning": { diff --git a/web-app/src/locales/zh-TW/common.json b/web-app/src/locales/zh-TW/common.json index 809ac0cd4..3caf19258 100644 --- a/web-app/src/locales/zh-TW/common.json +++ b/web-app/src/locales/zh-TW/common.json @@ -80,6 +80,7 @@ "tools": "工具", "webSearch": "網路搜尋", "reasoning": "推理", + "proactive": "主動模式", "selectAModel": "選擇一個模型", "noToolsAvailable": "沒有可用的工具", "noModelsFoundFor": "找不到符合「{{searchValue}}」的模型", diff --git a/web-app/src/locales/zh-TW/providers.json b/web-app/src/locales/zh-TW/providers.json index 39580818b..094c0f245 100644 --- a/web-app/src/locales/zh-TW/providers.json +++ b/web-app/src/locales/zh-TW/providers.json @@ -61,6 +61,7 @@ "capabilities": "功能", "tools": "工具", "vision": "視覺", + "proactive": "主動模式(實驗性)", "embeddings": "嵌入", "notAvailable": "尚不可用", "warning": {