added streaming

2025-11-11 20:09:36 -07:00 · 2025-11-11 20:09:36 -07:00 · ff247c1bcd
commit ff247c1bcd
parent ae8c6a4f04
13 changed files with 985 additions and 6 deletions
--- a/.cursorindexingignore
+++ b/.cursorindexingignore
@ -0,0 +1,3 @@
 # Don't index SpecStory auto-save files, but allow explicit context inclusion via @ references
 .specstory/**
--- a/.specstory/.gitignore
+++ b/.specstory/.gitignore
@ -0,0 +1,4 @@
 # SpecStory project identity file
 /.project.json
 # SpecStory explanation file
 /.what-is-this.md
--- a/README.md
+++ b/README.md
@ -14,10 +14,11 @@ A terminal application that enables two LLMs to engage in structured debates on
 - **Beautiful UI**: Rich terminal interface with side-by-side display, color-coded positions, and formatted output
 ### Advanced Features
 - **Streaming Responses**: Real-time streaming of LLM responses with live side-by-side display and tokens/second metrics
 - **Automatic Memory Management**: Token counting and automatic memory truncation to prevent context overflow
 - **Auto-Save**: Debates automatically saved after each round (configurable)
 - **Response Validation**: Ensures agents provide valid, non-empty responses
- **Statistics Tracking**: Real-time tracking of response times, token usage, and memory consumption
+- **Statistics Tracking**: Real-time tracking of response times, token usage, memory consumption, and streaming speeds
 - **Comprehensive Logging**: Optional file and console logging with configurable levels
 - **CLI Arguments**: Control all aspects via command-line flags
 - **Environment Variables**: Secure API key management via `.env` files
@ -138,6 +139,7 @@ python -m src.main [OPTIONS]
 - `--topic, -t TEXT` - Debate topic (skips interactive prompt)
 - `--exchanges, -e NUMBER` - Exchanges per round (default: 10)
 - `--no-auto-save` - Disable automatic saving after each round
 - `--no-streaming` - Disable streaming responses (show complete responses at once instead of real-time streaming)
 - `--log-level LEVEL` - Logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL
 - `--log-file PATH` - Log to file (default: console only)
 - `--max-memory-tokens NUMBER` - Maximum tokens to keep in agent memory
@ -157,6 +159,9 @@ python -m src.main --log-level DEBUG --log-file debug.log
 # Disable auto-save for manual control
 python -m src.main --no-auto-save
 # Disable streaming for slower connections
 python -m src.main --no-streaming
 # Use custom config and memory limit
 python -m src.main --config my_config.yaml --max-memory-tokens 50000
--- a/debates/debate_claude_is_a_better_programmer__20251111_200335.json
+++ b/debates/debate_claude_is_a_better_programmer__20251111_200335.json
--- a/debates/debate_pee_is_stored_in_the_balls_20251111_195431.json
+++ b/debates/debate_pee_is_stored_in_the_balls_20251111_195431.json
--- a/src/agent.py
+++ b/src/agent.py
@ -123,6 +123,35 @@ class DebateAgent:
        return response
    def generate_response_stream(self, **kwargs):
        """
        Generate a streaming response based on current memory.
        Yields chunks as they arrive and accumulates them into memory
        after streaming completes.
        Args:
            **kwargs: Additional parameters for the LLM provider
        Yields:
            str: Response chunks as they arrive
        Returns:
            str: The complete accumulated response
        """
        accumulated = []
        # Stream chunks from provider
        for chunk in self.provider.generate_response_stream(self.memory, **kwargs):
            accumulated.append(chunk)
            yield chunk
        # After streaming completes, add full response to memory
        full_response = ''.join(accumulated)
        self.memory.append({"role": "assistant", "content": full_response})
        return full_response
    def get_memory(self) -> List[Dict[str, str]]:
        """
        Get the agent's conversation memory.
--- a/src/constants.py
+++ b/src/constants.py
@ -50,3 +50,8 @@ DEFAULT_PRESENCE_PENALTY = 0.0
 # Token Estimation (approximate tokens per character for different languages)
 TOKENS_PER_CHAR_ENGLISH = 0.25  # Rough estimate for English text
 # Streaming Configuration
 STREAMING_ENABLED_DEFAULT = True  # Whether streaming is enabled by default
 STREAMING_REFRESH_RATE = 10  # UI updates per second during streaming
 STREAMING_MIN_TERMINAL_WIDTH = 100  # Minimum terminal width for side-by-side streaming
--- a/src/debate.py
+++ b/src/debate.py
@ -194,6 +194,160 @@ class DebateOrchestrator:
        return response_for, response_against
    def conduct_exchange_stream(
        self, agent_for: DebateAgent, agent_against: DebateAgent
    ) -> Tuple[str, str]:
        """
        Conduct one exchange with streaming responses (both agents respond once).
        Args:
            agent_for: Agent arguing 'for'
            agent_against: Agent arguing 'against'
        Returns:
            Tuple of (response_for, response_against)
        Raises:
            ProviderResponseError: If response validation fails
        """
        logger.info(f"Starting streaming exchange {self.current_exchange + 1}")
        # Build prompts (same as non-streaming)
        if self.current_exchange == 0:
            prompt_for = f"Present your opening argument for the position that {self.topic}."
        else:
            prompt_for = self._build_context_prompt(agent_for)
        agent_for.add_message("user", prompt_for)
        # Prepare prompt for AGAINST agent (will use after FOR finishes)
        if self.current_exchange == 0:
            # Will be updated with FOR's response after streaming
            prompt_against_template = "against_first_exchange"
        else:
            prompt_against = self._build_context_prompt(agent_against)
            agent_against.add_message("user", prompt_against)
        # Get streaming generators
        stream_for = agent_for.generate_response_stream()
        # We need to consume stream_for first to get the complete response
        # before we can build the prompt for agent_against in the first exchange
        if self.current_exchange == 0:
            # For first exchange, we need FOR's complete response before AGAINST can start
            # So we'll handle this specially in the UI function
            pass
        # Create generator for AGAINST (will be consumed after FOR completes)
        def get_stream_against():
            """Generator that yields chunks from AGAINST agent."""
            # For first exchange, build prompt with FOR's response
            if self.current_exchange == 0:
                # The response_for will be available after stream_for is consumed
                # We'll handle this in the UI layer by passing a callback
                pass
            # Generate streaming response
            for chunk in agent_against.generate_response_stream():
                yield chunk
        # Use streaming UI to display both responses
        # It will consume FOR first, then AGAINST
        from . import ui
        # Track timing
        start_time = time.time()
        # Handle first exchange specially (AGAINST needs FOR's response)
        if self.current_exchange == 0:
            # Manually consume FOR stream first
            response_for_chunks = []
            for chunk in stream_for:
                response_for_chunks.append(chunk)
            response_for = ''.join(response_for_chunks)
            # Validate FOR response
            response_for = self._validate_response(response_for, agent_for.name)
            # Record in debate history
            exchange_data_for = {
                "exchange": self.current_exchange + 1,
                "agent": agent_for.name,
                "position": "for",
                "content": response_for,
            }
            self.debate_history.append(exchange_data_for)
            # Now build AGAINST prompt with FOR's response
            prompt_against = (
                f"Your opponent's opening argument: {response_for}\n\n"
                f"Present your opening counter-argument against the position that {self.topic}."
            )
            agent_against.add_message("user", prompt_against)
            # Get AGAINST stream
            stream_against = agent_against.generate_response_stream()
            # Display with UI (FOR already complete, just show it while AGAINST streams)
            def for_replay():
                """Generator that just yields the complete FOR response."""
                yield response_for
            response_for_display, response_against, _, tokens_per_sec_against = ui.stream_exchange_pair(
                exchange_num=self.current_exchange + 1,
                agent_for_name=agent_for.name,
                agent_for_stream=for_replay(),
                agent_against_name=agent_against.name,
                agent_against_stream=stream_against,
                total_exchanges=self.exchanges_per_round,
            )
        else:
            # Normal case: stream both
            stream_against = agent_against.generate_response_stream()
            response_for, response_against, tokens_per_sec_for, tokens_per_sec_against = ui.stream_exchange_pair(
                exchange_num=self.current_exchange + 1,
                agent_for_name=agent_for.name,
                agent_for_stream=stream_for,
                agent_against_name=agent_against.name,
                agent_against_stream=stream_against,
                total_exchanges=self.exchanges_per_round,
            )
            # Validate FOR response
            response_for = self._validate_response(response_for, agent_for.name)
            # Record FOR in debate history
            exchange_data_for = {
                "exchange": self.current_exchange + 1,
                "agent": agent_for.name,
                "position": "for",
                "content": response_for,
            }
            self.debate_history.append(exchange_data_for)
        # Track timing
        response_time = time.time() - start_time
        self.response_times.append(response_time)
        self.total_response_time += response_time
        # Validate AGAINST response
        response_against = self._validate_response(response_against, agent_against.name)
        # Record AGAINST in debate history
        exchange_data_against = {
            "exchange": self.current_exchange + 1,
            "agent": agent_against.name,
            "position": "against",
            "content": response_against,
        }
        self.debate_history.append(exchange_data_against)
        self.current_exchange += 1
        logger.info(f"Streaming exchange {self.current_exchange} completed")
        return response_for, response_against
    def _build_context_prompt(self, agent: DebateAgent) -> str:
        """
        Build a context-aware prompt that includes recent debate history.
@ -255,7 +409,7 @@ class DebateOrchestrator:
        return response
    def run_round(
-        self, agent_for: DebateAgent, agent_against: DebateAgent
+        self, agent_for: DebateAgent, agent_against: DebateAgent, streaming: bool = True
    ) -> List[Dict[str, str]]:
        """
        Run a full round of exchanges.
@ -263,6 +417,7 @@ class DebateOrchestrator:
        Args:
            agent_for: Agent arguing 'for'
            agent_against: Agent arguing 'against'
            streaming: Whether to use streaming responses (default: True)
        Returns:
            List of exchanges from this round
@ -271,7 +426,10 @@ class DebateOrchestrator:
        exchanges_to_run = self.exchanges_per_round
        for _ in range(exchanges_to_run):
-            self.conduct_exchange(agent_for, agent_against)
+            if streaming:
                self.conduct_exchange_stream(agent_for, agent_against)
            else:
                self.conduct_exchange(agent_for, agent_against)
        return self.debate_history[round_start:]
--- a/src/main.py
+++ b/src/main.py
@ -244,7 +244,7 @@ def setup_configuration(config_path: str = DEFAULT_CONFIG_FILE) -> Config:
    return config
-def run_debate_loop(orchestrator: DebateOrchestrator, agent_for, agent_against, auto_save: bool = True):
+def run_debate_loop(orchestrator: DebateOrchestrator, agent_for, agent_against, auto_save: bool = True, streaming: bool = True):
    """
    Run the debate loop with user interaction.
@ -253,6 +253,7 @@ def run_debate_loop(orchestrator: DebateOrchestrator, agent_for, agent_against,
        agent_for: Agent arguing 'for'
        agent_against: Agent arguing 'against'
        auto_save: Whether to auto-save after each round
        streaming: Whether to use streaming responses (default: True)
    Raises:
        DebateError: If debate encounters an error
@ -265,7 +266,7 @@ def run_debate_loop(orchestrator: DebateOrchestrator, agent_for, agent_against,
        try:
            # Run the round (exchanges are displayed as they happen)
-            orchestrator.run_round(agent_for, agent_against)
+            orchestrator.run_round(agent_for, agent_against, streaming=streaming)
            # Auto-save after each round if enabled
            if auto_save:
@ -412,6 +413,12 @@ Examples:
        help="Maximum tokens to keep in agent memory"
    )
    parser.add_argument(
        "--no-streaming",
        action="store_true",
        help="Disable streaming responses (show complete responses at once)"
    )
    return parser.parse_args()
@ -485,7 +492,8 @@ def main():
        # Run the debate loop
        auto_save = not args.no_auto_save
-        run_debate_loop(orchestrator, agent_for, agent_against, auto_save=auto_save)
+        streaming = not args.no_streaming  # Streaming enabled by default
        run_debate_loop(orchestrator, agent_for, agent_against, auto_save=auto_save, streaming=streaming)
    except Exception as e:
        ui.print_error(f"Error during debate: {str(e)}")
--- a/src/providers/lmstudio.py
+++ b/src/providers/lmstudio.py
@ -1,5 +1,6 @@
 """LM Studio LLM provider implementation."""
 import json
 import time
 from typing import List, Dict, Any
 import requests
@ -89,6 +90,105 @@ class LMStudioProvider(BaseLLMProvider):
            logger.error(f"Unexpected error in LMStudio generate_response: {e}")
            raise ProviderError(f"Unexpected error: {str(e)}") from e
    def generate_response_stream(
        self, messages: List[Dict[str, str]], **kwargs
    ):
        """
        Generate a streaming response using LM Studio local API.
        Yields chunks of the response as they arrive via Server-Sent Events (SSE).
        Args:
            messages: List of message dictionaries with 'role' and 'content'
            **kwargs: Additional parameters (temperature, max_tokens, etc.)
        Yields:
            str: Response chunks as they arrive
        Raises:
            ProviderError: If the API call fails
            ProviderTimeoutError: If request times out
            ProviderConnectionError: If connection fails
        """
        logger.debug(f"Generating streaming response with LMStudio model: {self.model}")
        try:
            # Prepare the request payload
            payload = {
                "model": self.model,
                "messages": messages,
                "temperature": kwargs.get("temperature", DEFAULT_TEMPERATURE),
                "max_tokens": kwargs.get("max_tokens", DEFAULT_MAX_TOKENS_PER_RESPONSE),
                "stream": True,  # Enable streaming
            }
            # Add any additional kwargs
            for key, value in kwargs.items():
                if key not in ["model", "messages", "stream"]:
                    payload[key] = value
            logger.debug(f"Stream API params: model={payload['model']}, temp={payload['temperature']}, "
                        f"max_tokens={payload['max_tokens']}")
            # Make the streaming API request
            response = requests.post(
                f"{self.base_url}/chat/completions",
                json=payload,
                headers={"Content-Type": "application/json"},
                timeout=API_TIMEOUT_SECONDS,
                stream=True,  # Enable streaming mode
            )
            response.raise_for_status()
            # Parse Server-Sent Events stream
            for line in response.iter_lines():
                if line:
                    line = line.decode('utf-8')
                    # Skip comments and empty lines
                    if line.startswith(':') or not line.strip():
                        continue
                    # Parse data lines
                    if line.startswith('data: '):
                        data_str = line[6:]  # Remove 'data: ' prefix
                        # Check for stream end
                        if data_str.strip() == '[DONE]':
                            break
                        try:
                            data = json.loads(data_str)
                            if 'choices' in data and len(data['choices']) > 0:
                                delta = data['choices'][0].get('delta', {}).get('content')
                                if delta:
                                    yield delta
                        except json.JSONDecodeError:
                            logger.warning(f"Failed to parse SSE chunk: {data_str}")
                            continue
        except requests.exceptions.ConnectionError as e:
            logger.warning(f"Cannot connect to LMStudio during streaming: {e}")
            raise ProviderConnectionError(
                f"Cannot connect to LM Studio at {self.base_url}. "
                "Make sure LM Studio is running and the server is started."
            ) from e
        except requests.exceptions.Timeout as e:
            logger.warning(f"LMStudio request timed out during streaming: {e}")
            raise ProviderTimeoutError(
                f"LM Studio request timed out after {API_TIMEOUT_SECONDS}s"
            ) from e
        except requests.exceptions.HTTPError as e:
            logger.error(f"LMStudio HTTP error during streaming: {e}")
            raise ProviderError(f"LM Studio HTTP error: {str(e)}") from e
        except Exception as e:
            logger.error(f"Unexpected error in streaming: {e}")
            raise ProviderError(f"Unexpected error: {str(e)}") from e
    def _call_api(self, messages: List[Dict[str, str]], **kwargs) -> str:
        """
        Make the actual API call to LM Studio.
--- a/src/providers/openrouter.py
+++ b/src/providers/openrouter.py
@ -96,6 +96,82 @@ class OpenRouterProvider(BaseLLMProvider):
            logger.error(f"Unexpected error in OpenRouter generate_response: {e}")
            raise ProviderError(f"Unexpected error: {str(e)}") from e
    def generate_response_stream(
        self, messages: List[Dict[str, str]], **kwargs
    ):
        """
        Generate a streaming response using OpenRouter API.
        Yields chunks of the response as they arrive from the API.
        Args:
            messages: List of message dictionaries with 'role' and 'content'
            **kwargs: Additional parameters (temperature, max_tokens, etc.)
        Yields:
            str: Response chunks as they arrive
        Raises:
            ProviderError: If the API call fails
            ProviderRateLimitError: If rate limit is exceeded
            ProviderTimeoutError: If request times out
            ProviderConnectionError: If connection fails
        """
        logger.debug(f"Generating streaming response with OpenRouter model: {self.model}")
        try:
            # Set up params for streaming
            params = {
                "model": self.model,
                "messages": messages,
                "temperature": kwargs.get("temperature", DEFAULT_TEMPERATURE),
                "max_tokens": kwargs.get("max_tokens", DEFAULT_MAX_TOKENS_PER_RESPONSE),
                "stream": True,  # Enable streaming
            }
            # Add any additional kwargs
            for key, value in kwargs.items():
                if key not in ["model", "messages", "stream"]:
                    params[key] = value
            logger.debug(f"Stream API params: model={params['model']}, temp={params['temperature']}, "
                        f"max_tokens={params['max_tokens']}")
            # Create streaming request
            stream = self.client.chat.completions.create(**params)
            # Yield chunks as they arrive
            for chunk in stream:
                if chunk.choices and len(chunk.choices) > 0:
                    delta = chunk.choices[0].delta.content
                    if delta is not None:
                        yield delta
        except RateLimitError as e:
            logger.warning(f"Rate limit exceeded during streaming: {e}")
            raise ProviderRateLimitError(f"Rate limit exceeded: {str(e)}") from e
        except APITimeoutError as e:
            logger.warning(f"Request timed out during streaming: {e}")
            raise ProviderTimeoutError(f"Request timed out after {API_TIMEOUT_SECONDS}s") from e
        except APIConnectionError as e:
            logger.warning(f"Connection error during streaming: {e}")
            raise ProviderConnectionError(f"Failed to connect to OpenRouter: {str(e)}") from e
        except APIError as e:
            # Check for authentication errors
            if "401" in str(e) or "unauthorized" in str(e).lower():
                logger.error(f"Authentication failed during streaming: {e}")
                raise ProviderAuthenticationError(f"Invalid API key or authentication failed") from e
            logger.error(f"API error during streaming: {e}")
            raise ProviderError(f"OpenRouter API error: {str(e)}") from e
        except Exception as e:
            logger.error(f"Unexpected error in streaming: {e}")
            raise ProviderError(f"Unexpected error: {str(e)}") from e
    def _call_api(self, messages: List[Dict[str, str]], **kwargs) -> str:
        """
        Make the actual API call to OpenRouter.
--- a/src/ui.py
+++ b/src/ui.py
@ -5,7 +5,11 @@ from rich.panel import Panel
 from rich.markdown import Markdown
 from rich.prompt import Prompt
 from rich.table import Table
 from rich.live import Live
 from rich.layout import Layout
 from rich.text import Text
 from typing import Optional, Dict
 from .constants import STREAMING_REFRESH_RATE
 console = Console()
@ -132,6 +136,131 @@ def print_exchange_pair(
    console.print()
 def stream_exchange_pair(
    exchange_num: int,
    agent_for_name: str,
    agent_for_stream,  # Generator yielding chunks
    agent_against_name: str,
    agent_against_stream,  # Generator yielding chunks
    total_exchanges: int,
 ):
    """
    Display streaming responses side-by-side with live updates.
    Args:
        exchange_num: Exchange number
        agent_for_name: Name of agent arguing FOR
        agent_for_stream: Generator yielding FOR agent's response chunks
        agent_against_name: Name of agent arguing AGAINST
        agent_against_stream: Generator yielding AGAINST agent's response chunks
        total_exchanges: Total number of exchanges in round
    Returns:
        Tuple of (complete_for_response, complete_against_response, tokens_per_sec_for, tokens_per_sec_against)
    """
    import time
    from .utils.token_counter import count_tokens
    content_for = []
    content_against = []
    # Track timing for tokens/second calculation
    start_time_for = time.time()
    start_time_against = None
    end_time_for = None
    end_time_against = None
    # Create layout with two columns
    layout = Layout()
    layout.split_row(
        Layout(name="left"),
        Layout(name="right")
    )
    def update_display(for_text, against_text, for_done=False, against_done=False,
                      for_tokens_per_sec=None, against_tokens_per_sec=None):
        """Helper to update the live display."""
        # Build status indicators
        for_status = ""
        if for_done:
            for_status = " ✓"
            if for_tokens_per_sec:
                for_status += f" ({for_tokens_per_sec:.1f} tok/s)"
        else:
            for_status = " [dim][streaming...][/dim]"
        against_status = ""
        if not against_text and not against_done:
            against_status = " [dim][waiting...][/dim]"
        elif against_done:
            against_status = " ✓"
            if against_tokens_per_sec:
                against_status += f" ({against_tokens_per_sec:.1f} tok/s)"
        else:
            against_status = " [dim][streaming...][/dim]"
        # Left column - FOR agent
        layout["left"].update(
            Panel(
                Markdown(for_text) if for_text else Text("Starting...", style="dim"),
                title=f"[bold]Exchange {exchange_num}/{total_exchanges}[/bold]\n{agent_for_name} - [green]FOR[/green]{for_status}",
                border_style="green",
                padding=(1, 2),
            )
        )
        # Right column - AGAINST agent
        layout["right"].update(
            Panel(
                Markdown(against_text) if against_text else Text("Waiting...", style="dim"),
                title=f"[bold]Exchange {exchange_num}/{total_exchanges}[/bold]\n{agent_against_name} - [red]AGAINST[/red]{against_status}",
                border_style="red",
                padding=(1, 2),
            )
        )
    with Live(layout, refresh_per_second=STREAMING_REFRESH_RATE, console=console) as live:
        # Stream Agent FOR first
        for chunk in agent_for_stream:
            content_for.append(chunk)
            update_display(''.join(content_for), '', for_done=False, against_done=False)
        # Mark FOR as complete and calculate tokens/sec
        end_time_for = time.time()
        for_duration = end_time_for - start_time_for
        for_text = ''.join(content_for)
        for_tokens = count_tokens(for_text)
        for_tokens_per_sec = for_tokens / for_duration if for_duration > 0 else 0
        update_display(for_text, '', for_done=True, against_done=False,
                      for_tokens_per_sec=for_tokens_per_sec)
        # Stream Agent AGAINST
        start_time_against = time.time()
        for chunk in agent_against_stream:
            content_against.append(chunk)
            update_display(for_text, ''.join(content_against),
                          for_done=True, against_done=False,
                          for_tokens_per_sec=for_tokens_per_sec)
        # Mark AGAINST as complete and calculate tokens/sec
        end_time_against = time.time()
        against_duration = end_time_against - start_time_against
        against_text = ''.join(content_against)
        against_tokens = count_tokens(against_text)
        against_tokens_per_sec = against_tokens / against_duration if against_duration > 0 else 0
        update_display(for_text, against_text,
                      for_done=True, against_done=True,
                      for_tokens_per_sec=for_tokens_per_sec,
                      against_tokens_per_sec=against_tokens_per_sec)
    # After Live context, display remains on screen
    console.print()
    return for_text, against_text, for_tokens_per_sec, against_tokens_per_sec
 def print_round_complete(exchange_count: int):
    """
    Print round completion message.
--- a/tests/test_streaming.py
+++ b/tests/test_streaming.py
@ -0,0 +1,148 @@
 """Tests for streaming functionality."""
 import pytest
 from unittest.mock import Mock, patch
 from src.providers.openrouter import OpenRouterProvider
 from src.providers.lmstudio import LMStudioProvider
 from src.agent import DebateAgent
 def test_openrouter_stream_yields_chunks():
    """Test that OpenRouter streaming yields chunks."""
    # This is a mock test since we can't make real API calls
    provider = OpenRouterProvider(
        model="test-model",
        api_key="test-key"
    )
    # Mock the streaming response
    mock_chunks = [
        Mock(choices=[Mock(delta=Mock(content="Hello "))]),
        Mock(choices=[Mock(delta=Mock(content="world"))]),
        Mock(choices=[Mock(delta=Mock(content="!"))]),
    ]
    with patch.object(provider.client.chat.completions, 'create', return_value=iter(mock_chunks)):
        messages = [{"role": "user", "content": "Test"}]
        chunks = list(provider.generate_response_stream(messages))
        assert chunks == ["Hello ", "world", "!"]
 def test_agent_stream_accumulates_response():
    """Test that agent streaming accumulates response in memory."""
    mock_provider = Mock()
    mock_provider.generate_response_stream.return_value = iter(["Hello ", "world", "!"])
    agent = DebateAgent(
        name="Test Agent",
        provider=mock_provider,
        system_prompt="You are a test agent",
    )
    # Stream the response
    chunks = list(agent.generate_response_stream())
    # Check chunks were yielded
    assert chunks == ["Hello ", "world", "!"]
    # Check full response was added to memory
    assert len(agent.memory) == 1
    assert agent.memory[0]["role"] == "assistant"
    assert agent.memory[0]["content"] == "Hello world!"
 def test_agent_stream_with_existing_memory():
    """Test streaming with existing conversation memory."""
    mock_provider = Mock()
    mock_provider.generate_response_stream.return_value = iter(["Response"])
    agent = DebateAgent(
        name="Test Agent",
        provider=mock_provider,
        system_prompt="You are a test agent",
    )
    # Add some existing messages
    agent.add_message("user", "First message")
    agent.memory.append({"role": "assistant", "content": "First response"})
    agent.add_message("user", "Second message")
    # Stream the response
    list(agent.generate_response_stream())
    # Check memory has all messages
    assert len(agent.memory) == 4
    assert agent.memory[-1]["role"] == "assistant"
    assert agent.memory[-1]["content"] == "Response"
 def test_streaming_vs_non_streaming_same_result():
    """Test that streaming and non-streaming produce the same result."""
    mock_provider = Mock()
    # Set up mock for both methods
    mock_provider.generate_response.return_value = "Complete response"
    mock_provider.generate_response_stream.return_value = iter(["Complete ", "response"])
    agent1 = DebateAgent("Agent1", mock_provider, "Prompt")
    agent2 = DebateAgent("Agent2", mock_provider, "Prompt")
    # Non-streaming
    response1 = agent1.generate_response()
    # Streaming
    chunks = list(agent2.generate_response_stream())
    response2 = ''.join(chunks)
    # Both should produce same text
    assert response1 == "Complete response"
    assert response2 == "Complete response"
 def test_empty_stream_handling():
    """Test handling of empty streams."""
    mock_provider = Mock()
    mock_provider.generate_response_stream.return_value = iter([])
    agent = DebateAgent(
        name="Test Agent",
        provider=mock_provider,
        system_prompt="You are a test agent",
    )
    # Stream the response
    chunks = list(agent.generate_response_stream())
    # Should handle empty stream
    assert chunks == []
    assert len(agent.memory) == 1
    assert agent.memory[0]["content"] == ""
 def test_stream_with_none_chunks():
    """Test that None chunks are filtered out."""
    # Mock OpenRouter-style response with None deltas
    mock_provider = Mock()
    def mock_stream():
        yield "Hello"
        yield None  # Should be filtered
        yield " world"
        yield None  # Should be filtered
        yield "!"
    mock_provider.generate_response_stream.return_value = mock_stream()
    agent = DebateAgent(
        name="Test Agent",
        provider=mock_provider,
        system_prompt="You are a test agent",
    )
    # Stream the response
    chunks = [c for c in agent.generate_response_stream() if c is not None]
    # None chunks should not appear
    assert chunks == ["Hello", " world", "!"]
    assert agent.memory[0]["content"] == "Hello world!"
		`@ -0,0 +1,3 @@`

							`# Don't index SpecStory auto-save files, but allow explicit context inclusion via @ references`
							`.specstory/**`