Merge branch 'dev' into feat/multiple-scroll-behavior

2025-10-30 01:06:30 +07:00 · 2025-10-30 01:06:30 +07:00 · accd8fbde3
commit accd8fbde3
parent 7ed5ec0cc3 e7b7ac9e94
24 changed files with 892 additions and 11 deletions
--- a/web-app/src/containers/Capabilities.tsx
+++ b/web-app/src/containers/Capabilities.tsx
@ -10,6 +10,7 @@ import {
  IconAtom,
  IconWorld,
  IconCodeCircle2,
+  IconSparkles,
 } from '@tabler/icons-react'
 import { Fragment } from 'react/jsx-runtime'

@ -29,6 +30,8 @@ const Capabilities = ({ capabilities }: CapabilitiesProps) => {
          icon = <IconEye className="size-4" />
        } else if (capability === 'tools') {
          icon = <IconTool className="size-3.5" />
+        } else if (capability === 'proactive') {
+          icon = <IconSparkles className="size-3.5" />
        } else if (capability === 'reasoning') {
          icon = <IconAtom className="size-3.5" />
        } else if (capability === 'embeddings') {
@ -54,7 +57,11 @@ const Capabilities = ({ capabilities }: CapabilitiesProps) => {
                  </TooltipTrigger>
                  <TooltipContent>
                    <p>
-                      {capability === 'web_search' ? 'Web Search' : capability}
+                      {capability === 'web_search'
+                        ? 'Web Search'
+                        : capability === 'proactive'
+                        ? 'Proactive'
+                        : capability}
                    </p>
                  </TooltipContent>
                </Tooltip>
--- a/web-app/src/containers/ModelInfoHoverCard.tsx
+++ b/web-app/src/containers/ModelInfoHoverCard.tsx
@ -152,12 +152,19 @@ export const ModelInfoHoverCard = ({
          </div>

          {/* Features Section */}
-          {(model.num_mmproj > 0 || model.tools) && (
+          {(model.num_mmproj > 0 || model.tools || (model.num_mmproj > 0 && model.tools)) && (
            <div className="border-t border-main-view-fg/10 pt-3">
              <h5 className="text-xs font-medium text-main-view-fg/70 mb-2">
                Features
              </h5>
              <div className="flex flex-wrap gap-2">
+                {model.tools && (
+                  <div className="flex items-center gap-1.5 px-2 py-1 bg-main-view-fg/10 rounded-md">
+                    <span className="text-xs text-main-view-fg font-medium">
+                      Tools
+                    </span>
+                  </div>
+                )}
                {model.num_mmproj > 0 && (
                  <div className="flex items-center gap-1.5 px-2 py-1 bg-main-view-fg/10 rounded-md">
                    <span className="text-xs text-main-view-fg font-medium">
@ -165,10 +172,10 @@ export const ModelInfoHoverCard = ({
                    </span>
                  </div>
                )}
-                {model.tools && (
+                {model.num_mmproj > 0 && model.tools && (
                  <div className="flex items-center gap-1.5 px-2 py-1 bg-main-view-fg/10 rounded-md">
                    <span className="text-xs text-main-view-fg font-medium">
-                      Tools
+                      Proactive
                    </span>
                  </div>
                )}
--- a/web-app/src/containers/tests/Capabilities.test.tsx
+++ b/web-app/src/containers/tests/Capabilities.test.tsx
@ -0,0 +1,124 @@
+import { describe, it, expect, vi } from 'vitest'
+import { render, screen } from '@testing-library/react'
+import Capabilities from '../Capabilities'
+
+// Mock Tooltip components
+vi.mock('@/components/ui/tooltip', () => ({
+  Tooltip: ({ children }: { children: React.ReactNode }) => <div>{children}</div>,
+  TooltipContent: ({ children }: { children: React.ReactNode }) => <div>{children}</div>,
+  TooltipProvider: ({ children }: { children: React.ReactNode }) => <div>{children}</div>,
+  TooltipTrigger: ({ children }: { children: React.ReactNode }) => <div>{children}</div>,
+}))
+
+// Mock Tabler icons
+vi.mock('@tabler/icons-react', () => ({
+  IconEye: () => <div data-testid="icon-eye">Eye Icon</div>,
+  IconTool: () => <div data-testid="icon-tool">Tool Icon</div>,
+  IconSparkles: () => <div data-testid="icon-sparkles">Sparkles Icon</div>,
+  IconAtom: () => <div data-testid="icon-atom">Atom Icon</div>,
+  IconWorld: () => <div data-testid="icon-world">World Icon</div>,
+  IconCodeCircle2: () => <div data-testid="icon-code">Code Icon</div>,
+}))
+
+describe('Capabilities', () => {
+  it('should render vision capability with eye icon', () => {
+    render(<Capabilities capabilities={['vision']} />)
+
+    const eyeIcon = screen.getByTestId('icon-eye')
+    expect(eyeIcon).toBeInTheDocument()
+  })
+
+  it('should render tools capability with tool icon', () => {
+    render(<Capabilities capabilities={['tools']} />)
+
+    const toolIcon = screen.getByTestId('icon-tool')
+    expect(toolIcon).toBeInTheDocument()
+  })
+
+  it('should render proactive capability with sparkles icon', () => {
+    render(<Capabilities capabilities={['proactive']} />)
+
+    const sparklesIcon = screen.getByTestId('icon-sparkles')
+    expect(sparklesIcon).toBeInTheDocument()
+  })
+
+  it('should render reasoning capability with atom icon', () => {
+    render(<Capabilities capabilities={['reasoning']} />)
+
+    const atomIcon = screen.getByTestId('icon-atom')
+    expect(atomIcon).toBeInTheDocument()
+  })
+
+  it('should render web_search capability with world icon', () => {
+    render(<Capabilities capabilities={['web_search']} />)
+
+    const worldIcon = screen.getByTestId('icon-world')
+    expect(worldIcon).toBeInTheDocument()
+  })
+
+  it('should render embeddings capability with code icon', () => {
+    render(<Capabilities capabilities={['embeddings']} />)
+
+    const codeIcon = screen.getByTestId('icon-code')
+    expect(codeIcon).toBeInTheDocument()
+  })
+
+  it('should render multiple capabilities', () => {
+    render(<Capabilities capabilities={['tools', 'vision', 'proactive']} />)
+
+    expect(screen.getByTestId('icon-tool')).toBeInTheDocument()
+    expect(screen.getByTestId('icon-eye')).toBeInTheDocument()
+    expect(screen.getByTestId('icon-sparkles')).toBeInTheDocument()
+  })
+
+  it('should render all capabilities in correct order', () => {
+    render(<Capabilities capabilities={['tools', 'vision', 'proactive', 'reasoning', 'web_search', 'embeddings']} />)
+
+    expect(screen.getByTestId('icon-tool')).toBeInTheDocument()
+    expect(screen.getByTestId('icon-eye')).toBeInTheDocument()
+    expect(screen.getByTestId('icon-sparkles')).toBeInTheDocument()
+    expect(screen.getByTestId('icon-atom')).toBeInTheDocument()
+    expect(screen.getByTestId('icon-world')).toBeInTheDocument()
+    expect(screen.getByTestId('icon-code')).toBeInTheDocument()
+  })
+
+  it('should handle empty capabilities array', () => {
+    const { container } = render(<Capabilities capabilities={[]} />)
+
+    expect(container.querySelector('[data-testid^="icon-"]')).not.toBeInTheDocument()
+  })
+
+  it('should handle unknown capabilities gracefully', () => {
+    const { container } = render(<Capabilities capabilities={['unknown_capability']} />)
+
+    expect(container).toBeInTheDocument()
+  })
+
+  it('should display proactive tooltip with correct text', () => {
+    render(<Capabilities capabilities={['proactive']} />)
+
+    // The tooltip content should be 'Proactive'
+    expect(screen.getByTestId('icon-sparkles')).toBeInTheDocument()
+  })
+
+  it('should render proactive icon between tools/vision and reasoning', () => {
+    const { container } = render(<Capabilities capabilities={['tools', 'vision', 'proactive', 'reasoning']} />)
+
+    // All icons should be rendered
+    expect(screen.getByTestId('icon-tool')).toBeInTheDocument()
+    expect(screen.getByTestId('icon-eye')).toBeInTheDocument()
+    expect(screen.getByTestId('icon-sparkles')).toBeInTheDocument()
+    expect(screen.getByTestId('icon-atom')).toBeInTheDocument()
+
+    expect(container.querySelector('[data-testid="icon-sparkles"]')).toBeInTheDocument()
+  })
+
+  it('should apply correct CSS classes to proactive icon', () => {
+    render(<Capabilities capabilities={['proactive']} />)
+
+    const sparklesIcon = screen.getByTestId('icon-sparkles')
+    expect(sparklesIcon).toBeInTheDocument()
+    // Icon should have size-3.5 class (same as tools, reasoning, etc.)
+    expect(sparklesIcon.parentElement).toBeInTheDocument()
+  })
+})
--- a/web-app/src/containers/tests/ChatInput.test.tsx
+++ b/web-app/src/containers/tests/ChatInput.test.tsx
@ -437,4 +437,31 @@ describe('ChatInput', () => {
      expect(() => renderWithRouter()).not.toThrow()
    })
  })
+
+  describe('Proactive Mode', () => {
+    it('should render ChatInput with proactive capable model', async () => {
+      await act(async () => {
+        renderWithRouter()
+      })
+
+      expect(screen.getByTestId('chat-input')).toBeInTheDocument()
+    })
+
+    it('should handle proactive capability detection', async () => {
+      await act(async () => {
+        renderWithRouter()
+      })
+
+      expect(screen.getByTestId('chat-input')).toBeInTheDocument()
+    })
+
+    it('should work with models that have multiple capabilities', async () => {
+      await act(async () => {
+        renderWithRouter()
+      })
+
+      expect(screen.getByTestId('chat-input')).toBeInTheDocument()
+    })
+
+  })
 })
--- a/web-app/src/containers/tests/EditModel.test.tsx
+++ b/web-app/src/containers/tests/EditModel.test.tsx
@ -82,6 +82,7 @@ vi.mock('@tabler/icons-react', () => ({
  IconEye: () => <div data-testid="eye-icon" />,
  IconTool: () => <div data-testid="tool-icon" />,
  IconLoader2: () => <div data-testid="loader-icon" />,
+  IconSparkles: () => <div data-testid="sparkles-icon" />,
 }))

 describe('DialogEditModel - Basic Component Tests', () => {
@ -189,7 +190,7 @@ describe('DialogEditModel - Basic Component Tests', () => {
        {
          id: 'test-model.gguf',
          displayName: 'Test Model',
-          capabilities: ['vision', 'tools'],
+          capabilities: ['vision', 'tools', 'proactive'],
        },
      ],
      settings: [],
@ -226,7 +227,7 @@ describe('DialogEditModel - Basic Component Tests', () => {
        {
          id: 'test-model.gguf',
          displayName: 'Test Model',
-          capabilities: ['vision', 'tools', 'completion', 'embeddings', 'web_search', 'reasoning'],
+          capabilities: ['vision', 'tools', 'proactive', 'completion', 'embeddings', 'web_search', 'reasoning'],
        },
      ],
      settings: [],
@ -240,7 +241,7 @@ describe('DialogEditModel - Basic Component Tests', () => {
    )

    // Component should render without errors even with extra capabilities
-    // The capabilities helper should only extract vision and tools
+    // The capabilities helper should only extract vision, tools, and proactive
    expect(container).toBeInTheDocument()
  })
 })
--- a/web-app/src/containers/dialogs/EditModel.tsx
+++ b/web-app/src/containers/dialogs/EditModel.tsx
@ -17,6 +17,7 @@ import {
  IconTool,
  IconAlertTriangle,
  IconLoader2,
+  IconSparkles,
 } from '@tabler/icons-react'
 import { useState, useEffect } from 'react'
 import { useTranslation } from '@/i18n/react-i18next-compat'
@ -45,6 +46,7 @@ export const DialogEditModel = ({
  const [capabilities, setCapabilities] = useState<Record<string, boolean>>({
    vision: false,
    tools: false,
+    proactive: false,
  })

  // Initialize with the provided model ID or the first model if available
@ -67,6 +69,7 @@ export const DialogEditModel = ({
  const capabilitiesToObject = (capabilitiesList: string[]) => ({
    vision: capabilitiesList.includes('vision'),
    tools: capabilitiesList.includes('tools'),
+    proactive: capabilitiesList.includes('proactive'),
  })

  // Initialize capabilities and display name from selected model
@ -268,6 +271,23 @@ export const DialogEditModel = ({
                disabled={isLoading}
              />
            </div>
+
+            <div className="flex items-center justify-between">
+              <div className="flex items-center space-x-2">
+                <IconSparkles className="size-4 text-main-view-fg/70" />
+                <span className="text-sm">
+                  {t('providers:editModel.proactive')}
+                </span>
+              </div>
+              <Switch
+                id="proactive-capability"
+                checked={capabilities.proactive}
+                onCheckedChange={(checked) =>
+                  handleCapabilityChange('proactive', checked)
+                }
+                disabled={isLoading || !(capabilities.tools && capabilities.vision)}
+              />
+            </div>
          </div>
        </div>

--- a/web-app/src/hooks/tests/useChat.test.ts
+++ b/web-app/src/hooks/tests/useChat.test.ts
@ -170,6 +170,7 @@ vi.mock('@/lib/completion', () => ({
  sendCompletion: vi.fn(),
  postMessageProcessing: vi.fn(),
  isCompletionResponse: vi.fn(),
+  captureProactiveScreenshots: vi.fn(() => Promise.resolve([])),
 }))

 vi.mock('@/lib/messages', () => ({
@ -225,4 +226,26 @@ describe('useChat', () => {

    expect(result.current).toBeDefined()
  })
+
+  describe('Proactive Mode', () => {
+    it('should detect proactive mode when model has proactive capability', () => {
+      const { result } = renderHook(() => useChat())
+
+      expect(result.current).toBeDefined()
+      expect(typeof result.current).toBe('function')
+    })
+
+    it('should handle model with tools, vision, and proactive capabilities', () => {
+      const { result } = renderHook(() => useChat())
+
+      expect(result.current).toBeDefined()
+    })
+
+    it('should work with models that have proactive capability', () => {
+      const { result } = renderHook(() => useChat())
+
+      expect(result.current).toBeDefined()
+      expect(typeof result.current).toBe('function')
+    })
+  })
 })
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@ -16,6 +16,7 @@ import {
  newUserThreadContent,
  postMessageProcessing,
  sendCompletion,
+  captureProactiveScreenshots,
 } from '@/lib/completion'
 import { CompletionMessagesBuilder } from '@/lib/messages'
 import { renderInstructions } from '@/lib/instructionTemplate'
@ -419,6 +420,27 @@ export const useChat = () => {
            })
          : []

+        // Check if proactive mode is enabled
+        const isProactiveMode = selectedModel?.capabilities?.includes('proactive') ?? false
+
+        // Proactive mode: Capture initial screenshot/snapshot before first LLM call
+        if (isProactiveMode && availableTools.length > 0 && !abortController.signal.aborted) {
+          console.log('Proactive mode: Capturing initial screenshots before LLM call')
+          try {
+            const initialScreenshots = await captureProactiveScreenshots(abortController)
+
+            // Add initial screenshots to builder
+            for (const screenshot of initialScreenshots) {
+              // Generate unique tool call ID for initial screenshot
+              const proactiveToolCallId = `proactive_initial_${Date.now()}_${Math.random()}`
+              builder.addToolMessage(screenshot, proactiveToolCallId)
+              console.log('Initial proactive screenshot added to context')
+            }
+          } catch (e) {
+            console.warn('Failed to capture initial proactive screenshots:', e)
+          }
+        }
+
        let assistantLoopSteps = 0

        while (
@ -694,6 +716,10 @@ export const useChat = () => {
          )

          builder.addAssistantMessage(accumulatedText, undefined, toolCalls)
+
+          // Check if proactive mode is enabled for this model
+          const isProactiveMode = selectedModel?.capabilities?.includes('proactive') ?? false
+
          const updatedMessage = await postMessageProcessing(
            toolCalls,
            builder,
@ -701,7 +727,8 @@ export const useChat = () => {
            abortController,
            useToolApproval.getState().approvedTools,
            allowAllMCPPermissions ? undefined : showApprovalModal,
-            allowAllMCPPermissions
+            allowAllMCPPermissions,
+            isProactiveMode
          )
          addMessage(updatedMessage ?? finalContent)
          updateStreamingContent(emptyThreadContent)
--- a/web-app/src/lib/tests/completion.test.ts
+++ b/web-app/src/lib/tests/completion.test.ts
@ -1,5 +1,5 @@
 import { describe, it, expect, vi, beforeEach } from 'vitest'
-import { 
+import {
  newUserThreadContent,
  newAssistantThreadContent,
  emptyThreadContent,
@ -8,7 +8,8 @@ import {
  stopModel,
  normalizeTools,
  extractToolCall,
-  postMessageProcessing
+  postMessageProcessing,
+  captureProactiveScreenshots
 } from '../completion'

 // Mock dependencies
@ -72,6 +73,54 @@ vi.mock('../extension', () => ({
  ExtensionManager: {},
 }))

+vi.mock('@/hooks/useServiceHub', () => ({
+  getServiceHub: vi.fn(() => ({
+    mcp: vi.fn(() => ({
+      getTools: vi.fn(() => Promise.resolve([])),
+      callToolWithCancellation: vi.fn(() => ({
+        promise: Promise.resolve({
+          content: [{ type: 'text', text: 'mock result' }],
+          error: '',
+        }),
+        cancel: vi.fn(),
+      })),
+    })),
+    rag: vi.fn(() => ({
+      getToolNames: vi.fn(() => Promise.resolve([])),
+      callTool: vi.fn(() => Promise.resolve({
+        content: [{ type: 'text', text: 'mock rag result' }],
+        error: '',
+      })),
+    })),
+  })),
+}))
+
+vi.mock('@/hooks/useAttachments', () => ({
+  useAttachments: {
+    getState: vi.fn(() => ({ enabled: true })),
+  },
+}))
+
+vi.mock('@/hooks/useAppState', () => ({
+  useAppState: {
+    getState: vi.fn(() => ({
+      setCancelToolCall: vi.fn(),
+    })),
+  },
+}))
+
+vi.mock('@/lib/platform/const', () => ({
+  PlatformFeatures: {
+    ATTACHMENTS: true,
+  },
+}))
+
+vi.mock('@/lib/platform/types', () => ({
+  PlatformFeature: {
+    ATTACHMENTS: 'ATTACHMENTS',
+  },
+}))
+
 describe('completion.ts', () => {
  beforeEach(() => {
    vi.clearAllMocks()
@ -187,4 +236,448 @@ describe('completion.ts', () => {
      expect(result.length).toBe(0)
    })
  })
+
+  describe('Proactive Mode - Browser MCP Tool Detection', () => {
+    // We need to access the private function, so we'll test it through postMessageProcessing
+    it('should detect browser tool names with "browser" prefix', async () => {
+      const { getServiceHub } = await import('@/hooks/useServiceHub')
+      const mockGetTools = vi.fn(() => Promise.resolve([]))
+      const mockMcp = {
+        getTools: mockGetTools,
+        callToolWithCancellation: vi.fn(() => ({
+          promise: Promise.resolve({ content: [{ type: 'text', text: 'result' }], error: '' }),
+          cancel: vi.fn(),
+        }))
+      }
+      vi.mocked(getServiceHub).mockReturnValue({
+        mcp: () => mockMcp,
+        rag: () => ({ getToolNames: () => Promise.resolve([]) })
+      } as any)
+
+      const calls = [{
+        id: 'call_1',
+        type: 'function' as const,
+        function: { name: 'browserbase_navigate', arguments: '{"url": "test.com"}' }
+      }]
+      const builder = {
+        addToolMessage: vi.fn(),
+        getMessages: vi.fn(() => [])
+      } as any
+      const message = { thread_id: 'test-thread', metadata: {} } as any
+      const abortController = new AbortController()
+
+      await postMessageProcessing(
+        calls,
+        builder,
+        message,
+        abortController,
+        {},
+        undefined,
+        false,
+        true // isProactiveMode = true
+      )
+
+      // Verify tool was executed
+      expect(mockMcp.callToolWithCancellation).toHaveBeenCalled()
+    })
+
+    it('should detect browserbase tools', async () => {
+      const { getServiceHub } = await import('@/hooks/useServiceHub')
+      const mockCallTool = vi.fn(() => ({
+        promise: Promise.resolve({ content: [{ type: 'text', text: 'result' }], error: '' }),
+        cancel: vi.fn(),
+      }))
+      vi.mocked(getServiceHub).mockReturnValue({
+        mcp: () => ({
+          getTools: () => Promise.resolve([]),
+          callToolWithCancellation: mockCallTool
+        }),
+        rag: () => ({ getToolNames: () => Promise.resolve([]) })
+      } as any)
+
+      const calls = [{
+        id: 'call_1',
+        type: 'function' as const,
+        function: { name: 'browserbase_screenshot', arguments: '{}' }
+      }]
+      const builder = {
+        addToolMessage: vi.fn(),
+        getMessages: vi.fn(() => [])
+      } as any
+      const message = { thread_id: 'test-thread', metadata: {} } as any
+      const abortController = new AbortController()
+
+      await postMessageProcessing(calls, builder, message, abortController, {}, undefined, false, true)
+
+      expect(mockCallTool).toHaveBeenCalled()
+    })
+
+    it('should detect multi_browserbase tools', async () => {
+      const { getServiceHub } = await import('@/hooks/useServiceHub')
+      const mockCallTool = vi.fn(() => ({
+        promise: Promise.resolve({ content: [{ type: 'text', text: 'result' }], error: '' }),
+        cancel: vi.fn(),
+      }))
+      vi.mocked(getServiceHub).mockReturnValue({
+        mcp: () => ({
+          getTools: () => Promise.resolve([]),
+          callToolWithCancellation: mockCallTool
+        }),
+        rag: () => ({ getToolNames: () => Promise.resolve([]) })
+      } as any)
+
+      const calls = [{
+        id: 'call_1',
+        type: 'function' as const,
+        function: { name: 'multi_browserbase_stagehand_navigate', arguments: '{}' }
+      }]
+      const builder = {
+        addToolMessage: vi.fn(),
+        getMessages: vi.fn(() => [])
+      } as any
+      const message = { thread_id: 'test-thread', metadata: {} } as any
+      const abortController = new AbortController()
+
+      await postMessageProcessing(calls, builder, message, abortController, {}, undefined, false, true)
+
+      expect(mockCallTool).toHaveBeenCalled()
+    })
+
+    it('should not treat non-browser tools as browser tools', async () => {
+      const { getServiceHub } = await import('@/hooks/useServiceHub')
+      const mockGetTools = vi.fn(() => Promise.resolve([]))
+      vi.mocked(getServiceHub).mockReturnValue({
+        mcp: () => ({
+          getTools: mockGetTools,
+          callToolWithCancellation: vi.fn(() => ({
+            promise: Promise.resolve({ content: [{ type: 'text', text: 'result' }], error: '' }),
+            cancel: vi.fn(),
+          }))
+        }),
+        rag: () => ({ getToolNames: () => Promise.resolve([]) })
+      } as any)
+
+      const calls = [{
+        id: 'call_1',
+        type: 'function' as const,
+        function: { name: 'fetch_url', arguments: '{"url": "test.com"}' }
+      }]
+      const builder = {
+        addToolMessage: vi.fn(),
+        getMessages: vi.fn(() => [])
+      } as any
+      const message = { thread_id: 'test-thread', metadata: {} } as any
+      const abortController = new AbortController()
+
+      await postMessageProcessing(calls, builder, message, abortController, {}, undefined, false, true)
+
+      // Proactive screenshots should not be called for non-browser tools
+      expect(mockGetTools).not.toHaveBeenCalled()
+    })
+  })
+
+  describe('Proactive Mode - Screenshot Capture', () => {
+    it('should capture screenshot and snapshot when available', async () => {
+      const { getServiceHub } = await import('@/hooks/useServiceHub')
+      const mockScreenshotResult = {
+        content: [{ type: 'image', data: 'base64screenshot', mimeType: 'image/png' }],
+        error: '',
+      }
+      const mockSnapshotResult = {
+        content: [{ type: 'text', text: 'snapshot html' }],
+        error: '',
+      }
+
+      const mockGetTools = vi.fn(() => Promise.resolve([
+        { name: 'browserbase_screenshot', inputSchema: {} },
+        { name: 'browserbase_snapshot', inputSchema: {} }
+      ]))
+      const mockCallTool = vi.fn()
+        .mockReturnValueOnce({
+          promise: Promise.resolve(mockScreenshotResult),
+          cancel: vi.fn(),
+        })
+        .mockReturnValueOnce({
+          promise: Promise.resolve(mockSnapshotResult),
+          cancel: vi.fn(),
+        })
+
+      vi.mocked(getServiceHub).mockReturnValue({
+        mcp: () => ({
+          getTools: mockGetTools,
+          callToolWithCancellation: mockCallTool
+        })
+      } as any)
+
+      const abortController = new AbortController()
+      const results = await captureProactiveScreenshots(abortController)
+
+      expect(results).toHaveLength(2)
+      expect(results[0]).toEqual(mockScreenshotResult)
+      expect(results[1]).toEqual(mockSnapshotResult)
+      expect(mockCallTool).toHaveBeenCalledTimes(2)
+    })
+
+    it('should handle missing screenshot tool gracefully', async () => {
+      const { getServiceHub } = await import('@/hooks/useServiceHub')
+      const mockGetTools = vi.fn(() => Promise.resolve([
+        { name: 'some_other_tool', inputSchema: {} }
+      ]))
+
+      vi.mocked(getServiceHub).mockReturnValue({
+        mcp: () => ({
+          getTools: mockGetTools,
+          callToolWithCancellation: vi.fn()
+        })
+      } as any)
+
+      const abortController = new AbortController()
+      const results = await captureProactiveScreenshots(abortController)
+
+      expect(results).toHaveLength(0)
+    })
+
+    it('should handle screenshot capture errors gracefully', async () => {
+      const { getServiceHub } = await import('@/hooks/useServiceHub')
+      const mockGetTools = vi.fn(() => Promise.resolve([
+        { name: 'browserbase_screenshot', inputSchema: {} }
+      ]))
+      const mockCallTool = vi.fn(() => ({
+        promise: Promise.reject(new Error('Screenshot failed')),
+        cancel: vi.fn(),
+      }))
+
+      vi.mocked(getServiceHub).mockReturnValue({
+        mcp: () => ({
+          getTools: mockGetTools,
+          callToolWithCancellation: mockCallTool
+        })
+      } as any)
+
+      const abortController = new AbortController()
+      const results = await captureProactiveScreenshots(abortController)
+
+      // Should return empty array on error, not throw
+      expect(results).toHaveLength(0)
+    })
+
+    it('should respect abort controller', async () => {
+      const { getServiceHub } = await import('@/hooks/useServiceHub')
+      const mockGetTools = vi.fn(() => Promise.resolve([
+        { name: 'browserbase_screenshot', inputSchema: {} }
+      ]))
+      const mockCallTool = vi.fn(() => ({
+        promise: new Promise((resolve) => setTimeout(() => resolve({
+          content: [{ type: 'image', data: 'base64', mimeType: 'image/png' }],
+          error: '',
+        }), 100)),
+        cancel: vi.fn(),
+      }))
+
+      vi.mocked(getServiceHub).mockReturnValue({
+        mcp: () => ({
+          getTools: mockGetTools,
+          callToolWithCancellation: mockCallTool
+        })
+      } as any)
+
+      const abortController = new AbortController()
+      abortController.abort()
+
+      const results = await captureProactiveScreenshots(abortController)
+
+      // Should not attempt to capture if already aborted
+      expect(results).toHaveLength(0)
+    })
+  })
+
+  describe('Proactive Mode - Screenshot Filtering', () => {
+    it('should filter out old image_url content from tool messages', () => {
+      const builder = {
+        messages: [
+          { role: 'user', content: 'Hello' },
+          {
+            role: 'tool',
+            content: [
+              { type: 'text', text: 'Tool result' },
+              { type: 'image_url', image_url: { url: 'data:image/png;base64,old' } }
+            ],
+            tool_call_id: 'old_call'
+          },
+          { role: 'assistant', content: 'Response' },
+        ]
+      }
+
+      expect(builder.messages).toHaveLength(3)
+    })
+  })
+
+  describe('Proactive Mode - Integration', () => {
+    it('should trigger proactive screenshots after browser tool execution', async () => {
+      const { getServiceHub } = await import('@/hooks/useServiceHub')
+
+      const mockScreenshotResult = {
+        content: [{ type: 'image', data: 'proactive_screenshot', mimeType: 'image/png' }],
+        error: '',
+      }
+
+      const mockGetTools = vi.fn(() => Promise.resolve([
+        { name: 'browserbase_screenshot', inputSchema: {} }
+      ]))
+
+      let callCount = 0
+      const mockCallTool = vi.fn(() => {
+        callCount++
+        if (callCount === 1) {
+          // First call: the browser tool itself
+          return {
+            promise: Promise.resolve({
+              content: [{ type: 'text', text: 'navigated to page' }],
+              error: '',
+            }),
+            cancel: vi.fn(),
+          }
+        } else {
+          // Second call: proactive screenshot
+          return {
+            promise: Promise.resolve(mockScreenshotResult),
+            cancel: vi.fn(),
+          }
+        }
+      })
+
+      vi.mocked(getServiceHub).mockReturnValue({
+        mcp: () => ({
+          getTools: mockGetTools,
+          callToolWithCancellation: mockCallTool
+        }),
+        rag: () => ({ getToolNames: () => Promise.resolve([]) })
+      } as any)
+
+      const calls = [{
+        id: 'call_1',
+        type: 'function' as const,
+        function: { name: 'browserbase_navigate', arguments: '{"url": "test.com"}' }
+      }]
+      const builder = {
+        addToolMessage: vi.fn(),
+        getMessages: vi.fn(() => [])
+      } as any
+      const message = { thread_id: 'test-thread', metadata: {} } as any
+      const abortController = new AbortController()
+
+      await postMessageProcessing(
+        calls,
+        builder,
+        message,
+        abortController,
+        {},
+        undefined,
+        false,
+        true
+      )
+
+      // Should have called: 1) browser tool, 2) getTools, 3) proactive screenshot
+      expect(mockCallTool).toHaveBeenCalledTimes(2)
+      expect(mockGetTools).toHaveBeenCalled()
+      expect(builder.addToolMessage).toHaveBeenCalledTimes(2)
+    })
+
+    it('should not trigger proactive screenshots when mode is disabled', async () => {
+      const { getServiceHub } = await import('@/hooks/useServiceHub')
+
+      const mockGetTools = vi.fn(() => Promise.resolve([
+        { name: 'browserbase_screenshot', inputSchema: {} }
+      ]))
+
+      const mockCallTool = vi.fn(() => ({
+        promise: Promise.resolve({
+          content: [{ type: 'text', text: 'navigated' }],
+          error: '',
+        }),
+        cancel: vi.fn(),
+      }))
+
+      vi.mocked(getServiceHub).mockReturnValue({
+        mcp: () => ({
+          getTools: mockGetTools,
+          callToolWithCancellation: mockCallTool
+        }),
+        rag: () => ({ getToolNames: () => Promise.resolve([]) })
+      } as any)
+
+      const calls = [{
+        id: 'call_1',
+        type: 'function' as const,
+        function: { name: 'browserbase_navigate', arguments: '{}' }
+      }]
+      const builder = {
+        addToolMessage: vi.fn(),
+        getMessages: vi.fn(() => [])
+      } as any
+      const message = { thread_id: 'test-thread', metadata: {} } as any
+      const abortController = new AbortController()
+
+      await postMessageProcessing(
+        calls,
+        builder,
+        message,
+        abortController,
+        {},
+        undefined,
+        false,
+        false
+      )
+
+      expect(mockCallTool).toHaveBeenCalledTimes(1)
+      expect(mockGetTools).not.toHaveBeenCalled()
+    })
+
+    it('should not trigger proactive screenshots for non-browser tools', async () => {
+      const { getServiceHub } = await import('@/hooks/useServiceHub')
+
+      const mockGetTools = vi.fn(() => Promise.resolve([]))
+      const mockCallTool = vi.fn(() => ({
+        promise: Promise.resolve({
+          content: [{ type: 'text', text: 'fetched data' }],
+          error: '',
+        }),
+        cancel: vi.fn(),
+      }))
+
+      vi.mocked(getServiceHub).mockReturnValue({
+        mcp: () => ({
+          getTools: mockGetTools,
+          callToolWithCancellation: mockCallTool
+        }),
+        rag: () => ({ getToolNames: () => Promise.resolve([]) })
+      } as any)
+
+      const calls = [{
+        id: 'call_1',
+        type: 'function' as const,
+        function: { name: 'fetch_url', arguments: '{"url": "test.com"}' }
+      }]
+      const builder = {
+        addToolMessage: vi.fn(),
+        getMessages: vi.fn(() => [])
+      } as any
+      const message = { thread_id: 'test-thread', metadata: {} } as any
+      const abortController = new AbortController()
+
+      await postMessageProcessing(
+        calls,
+        builder,
+        message,
+        abortController,
+        {},
+        undefined,
+        false,
+        true
+      )
+
+      expect(mockCallTool).toHaveBeenCalledTimes(1)
+      expect(mockGetTools).not.toHaveBeenCalled()
+    })
+  })
 })
--- a/web-app/src/lib/completion.ts
+++ b/web-app/src/lib/completion.ts
@ -396,6 +396,120 @@ export const extractToolCall = (
  return calls
 }

+/**
+ * Helper function to check if a tool call is a browser MCP tool
+ * @param toolName - The name of the tool
+ * @returns true if the tool is a browser-related MCP tool
+ */
+const isBrowserMCPTool = (toolName: string): boolean => {
+  const browserToolPrefixes = [
+    'browser',
+    'browserbase',
+    'browsermcp',
+    'multi_browserbase',
+  ]
+  return browserToolPrefixes.some((prefix) =>
+    toolName.toLowerCase().startsWith(prefix)
+  )
+}
+
+/**
+ * Helper function to capture screenshot and snapshot proactively
+ * @param abortController - The abort controller for cancellation
+ * @returns Promise with screenshot and snapshot results
+ */
+export const captureProactiveScreenshots = async (
+  abortController: AbortController
+): Promise<ToolResult[]> => {
+  const results: ToolResult[] = []
+
+  try {
+    // Get available tools
+    const allTools = await getServiceHub().mcp().getTools()
+
+    // Find screenshot and snapshot tools
+    const screenshotTool = allTools.find((t) =>
+      t.name.toLowerCase().includes('screenshot')
+    )
+    const snapshotTool = allTools.find((t) =>
+      t.name.toLowerCase().includes('snapshot')
+    )
+
+    // Capture screenshot if available
+    if (screenshotTool && !abortController.signal.aborted) {
+      try {
+        const { promise } = getServiceHub().mcp().callToolWithCancellation({
+          toolName: screenshotTool.name,
+          arguments: {},
+        })
+        const screenshotResult = await promise
+        if (screenshotResult && typeof screenshotResult !== 'string') {
+          results.push(screenshotResult as ToolResult)
+        }
+      } catch (e) {
+        console.warn('Failed to capture proactive screenshot:', e)
+      }
+    }
+
+    // Capture snapshot if available
+    if (snapshotTool && !abortController.signal.aborted) {
+      try {
+        const { promise } = getServiceHub().mcp().callToolWithCancellation({
+          toolName: snapshotTool.name,
+          arguments: {},
+        })
+        const snapshotResult = await promise
+        if (snapshotResult && typeof snapshotResult !== 'string') {
+          results.push(snapshotResult as ToolResult)
+        }
+      } catch (e) {
+        console.warn('Failed to capture proactive snapshot:', e)
+      }
+    }
+  } catch (e) {
+    console.error('Failed to get MCP tools for proactive capture:', e)
+  }
+
+  return results
+}
+
+/**
+ * Helper function to filter out old screenshot/snapshot images from builder messages
+ * Keeps only the latest proactive screenshots
+ * @param builder - The completion messages builder
+ */
+const filterOldProactiveScreenshots = (builder: CompletionMessagesBuilder) => {
+  const messages = builder.getMessages()
+  const filteredMessages: any[] = []
+
+  for (const msg of messages) {
+    if (msg.role === 'tool') {
+      // If it's a tool message with array content (multimodal)
+      if (Array.isArray(msg.content)) {
+        // Filter out images, keep text only for old tool messages
+        const textOnly = msg.content.filter(
+          (part: any) => part.type !== 'image_url'
+        )
+        if (textOnly.length > 0) {
+          filteredMessages.push({ ...msg, content: textOnly })
+        }
+      } else {
+        // Keep string content as-is
+        filteredMessages.push(msg)
+      }
+    } else {
+      // Keep all non-tool messages
+      filteredMessages.push(msg)
+    }
+  }
+
+  // Reconstruct builder with filtered messages
+  // Note: This is a workaround since CompletionMessagesBuilder doesn't have a setter
+  // We'll need to access the private messages array
+  // eslint-disable-next-line no-extra-semi
+  ;(builder as any).messages = filteredMessages
+}
+
 /**
 * @fileoverview Helper function to process the completion response.
 * @param calls
@ -405,6 +519,7 @@ export const extractToolCall = (
 * @param approvedTools
 * @param showModal
 * @param allowAllMCPPermissions
+ * @param isProactiveMode
 */
 export const postMessageProcessing = async (
  calls: ChatCompletionMessageToolCall[],
@ -417,7 +532,8 @@ export const postMessageProcessing = async (
    threadId: string,
    toolParameters?: object
  ) => Promise<boolean>,
-  allowAllMCPPermissions: boolean = false
+  allowAllMCPPermissions: boolean = false,
+  isProactiveMode: boolean = false
 ) => {
  // Handle completed tool calls
  if (calls.length) {
@ -473,6 +589,7 @@ export const postMessageProcessing = async (
      const toolName = toolCall.function.name
      const toolArgs = toolCall.function.arguments.length ? toolParameters : {}
      const isRagTool = ragToolNames.has(toolName)
+      const isBrowserTool = isBrowserMCPTool(toolName)

      // Auto-approve RAG tools (local/safe operations), require permission for MCP tools
      const approved = isRagTool
@ -562,6 +679,27 @@ export const postMessageProcessing = async (
        ],
      }
      builder.addToolMessage(result as ToolResult, toolCall.id)
+
+      // Proactive mode: Capture screenshot/snapshot after browser tool execution
+      if (isProactiveMode && isBrowserTool && !abortController.signal.aborted) {
+        console.log('Proactive mode: Capturing screenshots after browser tool call')
+
+        // Filter out old screenshots before adding new ones
+        filterOldProactiveScreenshots(builder)
+
+        // Capture new screenshots
+        const proactiveScreenshots = await captureProactiveScreenshots(abortController)
+
+        // Add proactive screenshots to builder
+        for (const screenshot of proactiveScreenshots) {
+          // Generate a unique tool call ID for the proactive screenshot
+          const proactiveToolCallId = ulid()
+          builder.addToolMessage(screenshot, proactiveToolCallId)
+
+          console.log('Proactive screenshot captured and added to context')
+        }
+      }
+
      // update message metadata
    }
    return message
--- a/web-app/src/locales/de-DE/common.json
+++ b/web-app/src/locales/de-DE/common.json
@ -80,6 +80,7 @@
  "tools": "Werkzeuge",
  "webSearch": "Web Suche",
  "reasoning": "Argumentation",
+  "proactive": "Proaktiv",
  "selectAModel": "Wähle ein Modell",
  "noToolsAvailable": "Keine Werkzeuge verfügbar",
  "noModelsFoundFor": "Keine Modelle gefunden zu \"{{searchValue}}\"",
--- a/web-app/src/locales/de-DE/providers.json
+++ b/web-app/src/locales/de-DE/providers.json
@ -61,6 +61,7 @@
    "capabilities": "Fähigkeiten",
    "tools": "Werkzeuge",
    "vision": "Vision",
+    "proactive": "Proaktiv (Experimentell)",
    "embeddings": "Einbettungen",
    "notAvailable": "Noch nicht verfügbar",
    "warning": {
--- a/web-app/src/locales/en/common.json
+++ b/web-app/src/locales/en/common.json
@ -81,6 +81,7 @@
  "tools": "Tools",
  "webSearch": "Web Search",
  "reasoning": "Reasoning",
+  "proactive": "Proactive",
  "selectAModel": "Select a model",
  "noToolsAvailable": "No tools available",
  "noModelsFoundFor": "No models found for \"{{searchValue}}\"",
--- a/web-app/src/locales/en/providers.json
+++ b/web-app/src/locales/en/providers.json
@ -61,6 +61,7 @@
    "capabilities": "Capabilities",
    "tools": "Tools",
    "vision": "Vision",
+    "proactive": "Proactive (Experimental)",
    "embeddings": "Embeddings",
    "notAvailable": "Not available yet",
    "warning": {
--- a/web-app/src/locales/id/common.json
+++ b/web-app/src/locales/id/common.json
@ -80,6 +80,7 @@
  "tools": "Alat",
  "webSearch": "Pencarian Web",
  "reasoning": "Penalaran",
+  "proactive": "Proaktif",
  "selectAModel": "Pilih model",
  "noToolsAvailable": "Tidak ada alat yang tersedia",
  "noModelsFoundFor": "Tidak ada model yang ditemukan untuk \"{{searchValue}}\"",
--- a/web-app/src/locales/id/providers.json
+++ b/web-app/src/locales/id/providers.json
@ -61,6 +61,7 @@
    "capabilities": "Kemampuan",
    "tools": "Alat",
    "vision": "Visi",
+    "proactive": "Proaktif (Eksperimental)",
    "embeddings": "Embedding",
    "notAvailable": "Belum tersedia",
    "warning": {
--- a/web-app/src/locales/pl/common.json
+++ b/web-app/src/locales/pl/common.json
@ -80,6 +80,7 @@
  "tools": "Narzędzia",
  "webSearch": "Szukanie w Sieci",
  "reasoning": "Rozumowanie",
+  "proactive": "Proaktywny",
  "selectAModel": "Wybierz Model",
  "noToolsAvailable": "Brak narzędzi",
  "noModelsFoundFor": "Brak modeli dla \"{{searchValue}}\"",
--- a/web-app/src/locales/pl/providers.json
+++ b/web-app/src/locales/pl/providers.json
@ -61,6 +61,7 @@
    "capabilities": "Możliwości",
    "tools": "Narzędzia",
    "vision": "Wizja",
+    "proactive": "Proaktywny (Eksperymentalny)",
    "embeddings": "Osadzenia",
    "notAvailable": "Jeszcze niedostępne",
    "warning": {
--- a/web-app/src/locales/vn/common.json
+++ b/web-app/src/locales/vn/common.json
@ -80,6 +80,7 @@
  "tools": "Công cụ",
  "webSearch": "Tìm kiếm trên web",
  "reasoning": "Lý luận",
+  "proactive": "Chủ động",
  "selectAModel": "Chọn một mô hình",
  "noToolsAvailable": "Không có công cụ nào",
  "noModelsFoundFor": "Không tìm thấy mô hình nào cho \"{{searchValue}}\"",
--- a/web-app/src/locales/vn/providers.json
+++ b/web-app/src/locales/vn/providers.json
@ -61,6 +61,7 @@
    "capabilities": "Khả năng",
    "tools": "Công cụ",
    "vision": "Thị giác",
+    "proactive": "Chủ động (Thử nghiệm)",
    "embeddings": "Nhúng",
    "notAvailable": "Chưa có",
    "warning": {
--- a/web-app/src/locales/zh-CN/common.json
+++ b/web-app/src/locales/zh-CN/common.json
@ -80,6 +80,7 @@
  "tools": "工具",
  "webSearch": "网页搜索",
  "reasoning": "推理",
+  "proactive": "主动模式",
  "selectAModel": "选择一个模型",
  "noToolsAvailable": "无可用工具",
  "noModelsFoundFor": "未找到“{{searchValue}}”的模型",
--- a/web-app/src/locales/zh-CN/providers.json
+++ b/web-app/src/locales/zh-CN/providers.json
@ -61,6 +61,7 @@
    "capabilities": "功能",
    "tools": "工具",
    "vision": "视觉",
+    "proactive": "主动模式（实验性）",
    "embeddings": "嵌入",
    "notAvailable": "尚不可用",
    "warning": {
--- a/web-app/src/locales/zh-TW/common.json
+++ b/web-app/src/locales/zh-TW/common.json
@ -80,6 +80,7 @@
  "tools": "工具",
  "webSearch": "網路搜尋",
  "reasoning": "推理",
+  "proactive": "主動模式",
  "selectAModel": "選擇一個模型",
  "noToolsAvailable": "沒有可用的工具",
  "noModelsFoundFor": "找不到符合「{{searchValue}}」的模型",
--- a/web-app/src/locales/zh-TW/providers.json
+++ b/web-app/src/locales/zh-TW/providers.json
@ -61,6 +61,7 @@
    "capabilities": "功能",
    "tools": "工具",
    "vision": "視覺",
+    "proactive": "主動模式（實驗性）",
    "embeddings": "嵌入",
    "notAvailable": "尚不可用",
    "warning": {