feat: add autoqa (#5779)

* feat: add autoqa * chore: add auto start computer_server * chore: add ci autoqa windows * chore: add ci support for both windows and linux * chore: add ci support for macos * chore: refactor auto qa * chore: refactor autoqa workflow * chore: fix upload turn
2025-07-18 15:22:31 +07:00 · 2025-07-18 15:22:31 +07:00 · 4d44f4324d
commit 4d44f4324d
parent a56e58f69b
28 changed files with 3257 additions and 1 deletions
--- a/.github/workflows/autoqa-manual-trigger.yml
+++ b/.github/workflows/autoqa-manual-trigger.yml
@ -0,0 +1,37 @@
 name: Manual trigger AutoQA Test Runner
 on:
  workflow_dispatch:
    inputs:
      jan_app_url_windows:
        description: 'URL to download Jan app for Windows (.exe)'
        required: true
        type: string
        default: 'https://delta.jan.ai/nightly/Jan-nightly_0.6.5-758_x64-setup.exe'
      jan_app_url_ubuntu:
        description: 'URL to download Jan app for Ubuntu (.deb)'
        required: true
        type: string
        default: 'https://delta.jan.ai/nightly/Jan-nightly_0.6.5-758_amd64.deb'
      jan_app_url_macos:
        description: 'URL to download Jan app for macOS (.dmg)'
        required: true
        type: string
        default: 'https://delta.jan.ai/nightly/Jan-nightly_0.6.5-758_universal.dmg'
      is_nightly:
        description: 'Is this a nightly build?'
        required: true
        type: boolean
        default: true
 jobs:
  call-autoqa-template:
    uses: ./.github/workflows/autoqa-template.yml
    with:
      jan_app_windows_source: ${{ inputs.jan_app_url_windows }}
      jan_app_ubuntu_source: ${{ inputs.jan_app_url_ubuntu }}
      jan_app_macos_source: ${{ inputs.jan_app_url_macos }}
      is_nightly: ${{ inputs.is_nightly }}
      source_type: 'url'
    secrets:
      RP_TOKEN: ${{ secrets.RP_TOKEN }}
--- a/.github/workflows/autoqa-template.yml
+++ b/.github/workflows/autoqa-template.yml
@ -0,0 +1,396 @@
 name: Auto QA Test Runner Template
 on:
  workflow_call:
    inputs:
      jan_app_windows_source:
        description: 'Windows app source - can be URL or local path'
        required: true
        type: string
      jan_app_ubuntu_source:
        description: 'Ubuntu app source - can be URL or local path'
        required: true
        type: string
      jan_app_macos_source:
        description: 'macOS app source - can be URL or local path'
        required: true
        type: string
      is_nightly:
        description: 'Is this a nightly build?'
        required: true
        type: boolean
        default: true
      source_type:
        description: 'Source type: url or local'
        required: true
        type: string
        default: 'url'
      artifact_name_windows:
        description: 'Windows artifact name (only needed for local)'
        required: false
        type: string
        default: ''
      artifact_name_ubuntu:
        description: 'Ubuntu artifact name (only needed for local)'
        required: false
        type: string
        default: ''
      artifact_name_macos:
        description: 'macOS artifact name (only needed for local)'
        required: false
        type: string
        default: ''
    secrets:
      RP_TOKEN:
        description: 'ReportPortal API token'
        required: true
 jobs:
  windows:
    runs-on: windows-11-nvidia-gpu
    timeout-minutes: 60
    env:
      DEFAULT_JAN_APP_URL: 'https://catalog.jan.ai/windows/Jan-nightly_0.6.5-758_x64-setup.exe'
      DEFAULT_IS_NIGHTLY: 'true'
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Setup Python 3.13
        uses: actions/setup-python@v4
        with:
          python-version: '3.13'
      - name: Download artifact (if source_type is local)
        if: inputs.source_type == 'local'
        uses: actions/download-artifact@v4
        with:
          name: ${{ inputs.artifact_name_windows }}
          path: ${{ runner.temp }}/windows-artifact
      - name: Clean existing Jan installations
        shell: powershell
        run: |
          .\autoqa\scripts\windows_cleanup.ps1 -IsNightly "${{ inputs.is_nightly }}"
      - name: Download/Prepare Jan app
        shell: powershell
        run: |
          if ("${{ inputs.source_type }}" -eq "local") {
            # Find the exe file in the artifact
            $exeFile = Get-ChildItem -Path "${{ runner.temp }}/windows-artifact" -Recurse -Filter "*.exe" | Select-Object -First 1
            if ($exeFile) {
              Write-Host "✅ Found local installer: $($exeFile.FullName)"
              Copy-Item -Path $exeFile.FullName -Destination "$env:TEMP\jan-installer.exe" -Force
              Write-Host "✅ Installer copied to: $env:TEMP\jan-installer.exe"
              # Don't set JAN_APP_PATH here - let the install script set it to the correct installed app path
              echo "IS_NIGHTLY=${{ inputs.is_nightly }}" >> $env:GITHUB_ENV
            } else {
              Write-Error "❌ No .exe file found in artifact"
              exit 1
            }
          } else {
            # Use the existing download script for URLs
            .\autoqa\scripts\windows_download.ps1 `
              -WorkflowInputUrl "${{ inputs.jan_app_windows_source }}" `
              -WorkflowInputIsNightly "${{ inputs.is_nightly }}" `
              -RepoVariableUrl "${{ vars.JAN_APP_URL }}" `
              -RepoVariableIsNightly "${{ vars.IS_NIGHTLY }}" `
              -DefaultUrl "$env:DEFAULT_JAN_APP_URL" `
              -DefaultIsNightly "$env:DEFAULT_IS_NIGHTLY"
          }
      - name: Install Jan app
        shell: powershell
        run: |
          .\autoqa\scripts\windows_install.ps1 -IsNightly "$env:IS_NIGHTLY"
      - name: Install Python dependencies
        working-directory: autoqa
        run: |
          python -m pip install --upgrade pip
          pip install -r requirements.txt
      - name: Run Auto QA Tests
        working-directory: autoqa
        shell: powershell
        env:
          RP_TOKEN: ${{ secrets.RP_TOKEN }}
          ENABLE_REPORTPORTAL: 'true'
          RP_ENDPOINT: 'https://reportportal.menlo.ai'
          RP_PROJECT: 'default_personal'
          MAX_TURNS: '50'
          DELAY_BETWEEN_TESTS: '3'
          LAUNCH_NAME: 'CI AutoQA Run Windows - ${{ github.run_number }} - ${{ github.ref_name }}'
        run: |
          .\scripts\run_tests.ps1 -JanAppPath "$env:JAN_APP_PATH" -ProcessName "$env:JAN_PROCESS_NAME" -RpToken "$env:RP_TOKEN"
      - name: Cleanup after tests
        if: always()
        shell: powershell
        run: |
          .\autoqa\scripts\windows_post_cleanup.ps1 -IsNightly "${{ inputs.is_nightly }}"
  ubuntu:
    runs-on: ubuntu-22-04-nvidia-gpu
    timeout-minutes: 60
    env:
      DEFAULT_JAN_APP_URL: 'https://delta.jan.ai/nightly/Jan-nightly_0.6.4-728_amd64.deb'
      DEFAULT_IS_NIGHTLY: 'true'
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Setup Python 3.13
        uses: actions/setup-python@v4
        with:
          python-version: '3.13'
      - name: Download artifact (if source_type is local)
        if: inputs.source_type == 'local'
        uses: actions/download-artifact@v4
        with:
          name: ${{ inputs.artifact_name_ubuntu }}
          path: ${{ runner.temp }}/ubuntu-artifact
      - name: Install system dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y \
            x11-utils \
            python3-tk \
            python3-dev \
            wmctrl \
            xdotool \
            libnss3-dev \
            libgconf-2-4 \
            libxss1 \
            libasound2 \
            libxtst6 \
            libgtk-3-0 \
            libgbm-dev \
            libxshmfence1 \
            libxrandr2 \
            libpangocairo-1.0-0 \
            libatk1.0-0 \
            libcairo-gobject2 \
            libgdk-pixbuf2.0-0 \
            gnome-screenshot
      - name: Setup script permissions
        run: |
          chmod +x autoqa/scripts/setup_permissions.sh
          ./autoqa/scripts/setup_permissions.sh
      - name: Clean existing Jan installations
        run: |
          ./autoqa/scripts/ubuntu_cleanup.sh
      - name: Download/Prepare Jan app
        run: |
          if [ "${{ inputs.source_type }}" = "local" ]; then
            # Find the deb file in the artifact
            DEB_FILE=$(find "${{ runner.temp }}/ubuntu-artifact" -name "*.deb" -type f | head -1)
            if [ -n "$DEB_FILE" ]; then
              echo "✅ Found local installer: $DEB_FILE"
              cp "$DEB_FILE" "/tmp/jan-installer.deb"
              echo "✅ Installer copied to: /tmp/jan-installer.deb"
              echo "JAN_APP_PATH=/tmp/jan-installer.deb" >> $GITHUB_ENV
              echo "IS_NIGHTLY=${{ inputs.is_nightly }}" >> $GITHUB_ENV
              if [ "${{ inputs.is_nightly }}" = "true" ]; then
                echo "JAN_PROCESS_NAME=Jan-nightly" >> $GITHUB_ENV
              else
                echo "JAN_PROCESS_NAME=Jan" >> $GITHUB_ENV
              fi
            else
              echo "❌ No .deb file found in artifact"
              exit 1
            fi
          else
            # Use the existing download script for URLs
            ./autoqa/scripts/ubuntu_download.sh \
              "${{ inputs.jan_app_ubuntu_source }}" \
              "${{ inputs.is_nightly }}" \
              "${{ vars.JAN_APP_URL_LINUX }}" \
              "${{ vars.IS_NIGHTLY }}" \
              "$DEFAULT_JAN_APP_URL" \
              "$DEFAULT_IS_NIGHTLY"
            # Set the correct environment variables for the test runner
            echo "JAN_APP_PATH=/tmp/jan-installer.deb" >> $GITHUB_ENV
            if [ "${{ inputs.is_nightly }}" = "true" ]; then
              echo "JAN_PROCESS_NAME=Jan-nightly" >> $GITHUB_ENV
            else
              echo "JAN_PROCESS_NAME=Jan" >> $GITHUB_ENV
            fi
          fi
      - name: Install Jan app
        run: |
          ./autoqa/scripts/ubuntu_install.sh "$IS_NIGHTLY"
      - name: Install Python dependencies
        working-directory: autoqa
        run: |
          python -m pip install --upgrade pip
          pip install -r requirements.txt
      - name: Run Auto QA Tests
        working-directory: autoqa
        env:
          RP_TOKEN: ${{ secrets.RP_TOKEN }}
          ENABLE_REPORTPORTAL: 'true'
          RP_ENDPOINT: 'https://reportportal.menlo.ai'
          RP_PROJECT: 'default_personal'
          MAX_TURNS: '50'
          DELAY_BETWEEN_TESTS: '3'
          LAUNCH_NAME: 'CI AutoQA Run Ubuntu - ${{ github.run_number }} - ${{ github.ref_name }}'
        run: |
          ./scripts/run_tests.sh "$JAN_APP_PATH" "$JAN_PROCESS_NAME" "$RP_TOKEN" "ubuntu"
      - name: Cleanup after tests
        if: always()
        run: |
          ./autoqa/scripts/ubuntu_post_cleanup.sh "$IS_NIGHTLY"
  macos:
    runs-on: macos-selfhosted-15-arm64
    timeout-minutes: 60
    env:
      DEFAULT_JAN_APP_URL: 'https://delta.jan.ai/nightly/Jan-nightly_0.6.4-728_universal.dmg'
      DEFAULT_IS_NIGHTLY: 'true'
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Setup Python 3.13
        uses: actions/setup-python@v4
        with:
          python-version: '3.13'
      - name: Download artifact (if source_type is local)
        if: inputs.source_type == 'local'
        uses: actions/download-artifact@v4
        with:
          name: ${{ inputs.artifact_name_macos }}
          path: ${{ runner.temp }}/macos-artifact
      - name: Setup script permissions
        run: |
          chmod +x autoqa/scripts/setup_permissions.sh
          ./autoqa/scripts/setup_permissions.sh
      - name: Clean existing Jan installations
        run: |
          ./autoqa/scripts/macos_cleanup.sh
      - name: Download/Prepare Jan app
        run: |
          if [ "${{ inputs.source_type }}" = "local" ]; then
            # Find the dmg file in the artifact
            DMG_FILE=$(find "${{ runner.temp }}/macos-artifact" -name "*.dmg" -type f | head -1)
            if [ -n "$DMG_FILE" ]; then
              echo "✅ Found local installer: $DMG_FILE"
              cp "$DMG_FILE" "/tmp/jan-installer.dmg"
              echo "✅ Installer copied to: /tmp/jan-installer.dmg"
              echo "JAN_APP_PATH=/tmp/jan-installer.dmg" >> $GITHUB_ENV
              echo "IS_NIGHTLY=${{ inputs.is_nightly }}" >> $GITHUB_ENV
              if [ "${{ inputs.is_nightly }}" = "true" ]; then
                echo "PROCESS_NAME=Jan-nightly" >> $GITHUB_ENV
              else
                echo "PROCESS_NAME=Jan" >> $GITHUB_ENV
              fi
            else
              echo "❌ No .dmg file found in artifact"
              exit 1
            fi
          else
            # Use the existing download script for URLs
            ./autoqa/scripts/macos_download.sh \
              "${{ inputs.jan_app_macos_source }}" \
              "${{ inputs.is_nightly }}" \
              "${{ vars.JAN_APP_URL }}" \
              "${{ vars.IS_NIGHTLY }}" \
              "$DEFAULT_JAN_APP_URL" \
              "$DEFAULT_IS_NIGHTLY"
            # Set the correct environment variables for the test runner
            echo "JAN_APP_PATH=/tmp/jan-installer.dmg" >> $GITHUB_ENV
            if [ "${{ inputs.is_nightly }}" = "true" ]; then
              echo "PROCESS_NAME=Jan-nightly" >> $GITHUB_ENV
            else
              echo "PROCESS_NAME=Jan" >> $GITHUB_ENV
            fi
          fi
      - name: Install Jan app
        run: |
          ./autoqa/scripts/macos_install.sh
      - name: Install system dependencies
        run: |
          echo "Installing system dependencies for macOS..."
          # Check if Homebrew is available
          if command -v brew >/dev/null 2>&1; then
            echo "Homebrew is available"
            # Install python-tk if not available
            python3 -c "import tkinter" 2>/dev/null || {
              echo "Installing python-tk via Homebrew..."
              brew install python-tk || true
            }
          else
            echo "Homebrew not available, checking if tkinter works..."
            python3 -c "import tkinter" || {
              echo "⚠️ tkinter not available and Homebrew not found"
              echo "This may cause issues with mouse control"
            }
          fi
          echo "System dependencies check completed"
      - name: Install Python dependencies
        run: |
          cd autoqa
          echo "Installing Python dependencies..."
          pip install --upgrade pip
          pip install -r requirements.txt
          echo "✅ Python dependencies installed"
      - name: Setup ReportPortal environment
        run: |
          echo "Setting up ReportPortal environment..."
          echo "RP_TOKEN=${{ secrets.RP_TOKEN }}" >> $GITHUB_ENV
          echo "ReportPortal environment configured"
      - name: Run E2E tests
        env:
          RP_TOKEN: ${{ secrets.RP_TOKEN }}
          ENABLE_REPORTPORTAL: 'true'
          RP_ENDPOINT: 'https://reportportal.menlo.ai'
          RP_PROJECT: 'default_personal'
          MAX_TURNS: '50'
          DELAY_BETWEEN_TESTS: '3'
          LAUNCH_NAME: 'CI AutoQA Run Macos - ${{ github.run_number }} - ${{ github.ref_name }}'
        run: |
          cd autoqa
          echo "Starting E2E test execution..."
          echo "Environment variables:"
          echo "JAN_APP_PATH: $JAN_APP_PATH"
          echo "PROCESS_NAME: $PROCESS_NAME"
          echo "IS_NIGHTLY: $IS_NIGHTLY"
          ./scripts/run_tests.sh "$JAN_APP_PATH" "$PROCESS_NAME" "$RP_TOKEN" "macos"
      - name: Cleanup after tests
        if: always()
        run: |
          ./autoqa/scripts/macos_post_cleanup.sh
--- a/.github/workflows/jan-tauri-build-nightly.yaml
+++ b/.github/workflows/jan-tauri-build-nightly.yaml
@ -223,3 +223,49 @@ jobs:
          RUN_ID=${{ github.run_id }}
          COMMENT="This is the build for this pull request. You can download it from the Artifacts section here: [Build URL](https://github.com/${{ github.repository }}/actions/runs/${RUN_ID})."
          gh pr comment $PR_URL --body "$COMMENT"
  # AutoQA trigger for S3 builds
  trigger-autoqa-s3:
    needs:
      [
        build-macos,
        build-windows-x64,
        build-linux-x64,
        get-update-version,
        set-public-provider,
        sync-temp-to-latest,
      ]
    if: needs.set-public-provider.outputs.public_provider == 'aws-s3'
    uses: ./.github/workflows/autoqa-template.yml
    with:
      jan_app_windows_source: 'https://delta.jan.ai/nightly/Jan-nightly_${{ needs.get-update-version.outputs.new_version }}_x64-setup.exe'
      jan_app_ubuntu_source: 'https://delta.jan.ai/nightly/Jan-nightly_${{ needs.get-update-version.outputs.new_version }}_amd64.deb'
      jan_app_macos_source: 'https://delta.jan.ai/nightly/Jan-nightly_${{ needs.get-update-version.outputs.new_version }}_universal.dmg'
      is_nightly: true
      source_type: 'url'
    secrets:
      RP_TOKEN: ${{ secrets.RP_TOKEN }}
  # AutoQA trigger for artifact builds
  trigger-autoqa-artifacts:
    needs:
      [
        build-macos,
        build-windows-x64,
        build-linux-x64,
        get-update-version,
        set-public-provider,
      ]
    if: needs.set-public-provider.outputs.public_provider == 'none'
    uses: ./.github/workflows/autoqa-template.yml
    with:
      jan_app_windows_source: '' # Not needed for artifacts
      jan_app_ubuntu_source: '' # Not needed for artifacts
      jan_app_macos_source: '' # Not needed for artifacts
      is_nightly: true
      source_type: 'local'
      artifact_name_windows: 'jan-windows-${{ needs.get-update-version.outputs.new_version }}'
      artifact_name_ubuntu: 'jan-linux-amd64-${{ needs.get-update-version.outputs.new_version }}-deb'
      artifact_name_macos: 'jan-nightly-mac-universal-${{ needs.get-update-version.outputs.new_version }}.dmg'
    secrets:
      RP_TOKEN: ${{ secrets.RP_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@ -50,4 +50,9 @@ src-tauri/resources/bin
 .opencode
 OpenCode.md
 archive/
-.cache/
+.cache/
 # auto qa
 autoqa/trajectories
 autoqa/recordings
 autoqa/__pycache__
--- a/autoqa/README.md
+++ b/autoqa/README.md
@ -0,0 +1,319 @@
 # E2E Test Runner with ReportPortal Integration
 🚀 An automated end-to-end test runner for Jan application with ReportPortal integration, screen recording, and comprehensive test monitoring.
 ## Features
 - ✅ **Automated Jan App Testing**: Automatically starts/stops Jan application
 - 🖥️ **Auto Computer Server**: Automatically starts computer server in background
 - 📹 **Screen Recording**: Records test execution for debugging
 - 📊 **ReportPortal Integration**: Optional test results upload to ReportPortal
 - 🔄 **Turn Monitoring**: Prevents infinite loops with configurable turn limits
 - 🎯 **Flexible Configuration**: Command-line arguments and environment variables
 - 🌐 **Cross-platform**: Windows, macOS, and Linux support
 - 📁 **Test Discovery**: Automatically scans test files from directory
 ## Prerequisites
 - Python 3.8+
 - Jan application installed
 - Windows Sandbox (for computer provider)
 - Computer server package installed
 - Required Python packages (see requirements.txt)
 ## Installation
 1. Clone the repository:
 ```bash
 git clone <repository-url>
 cd autoqa
 ```
 2. Install dependencies:
 ```bash
 ## For Windows and Linux
 pip install -r requirements.txt
 ```
 3. Ensure Jan application is installed in one of the default locations:
   - Windows: `%LOCALAPPDATA%\Programs\jan\Jan.exe`
   - macOS: `~/Applications/Jan.app/Contents/MacOS/Jan`
   - Linux: `jan` (in PATH)
 ## Quick Start
 ### Local Development (No ReportPortal)
 ```bash
 # Run all tests in ./tests directory (auto-starts computer server)
 python main.py
 # Run with custom test directory
 python main.py --tests-dir "my_tests"
 # Run with custom Jan app path
 python main.py --jan-app-path "C:/Custom/Path/Jan.exe"
 # Skip auto computer server start (if already running)
 python main.py --skip-server-start
 ```
 ### With ReportPortal Integration
 ```bash
 # Enable ReportPortal with token
 python main.py --enable-reportportal --rp-token "YOUR_API_TOKEN"
 # Full ReportPortal configuration
 python main.py \
  --enable-reportportal \
  --rp-endpoint "https://reportportal.example.com" \
  --rp-project "my_project" \
  --rp-token "YOUR_API_TOKEN"
 ```
 ## Configuration
 ### Command Line Arguments
 | Argument                | Environment Variable  | Default                         | Description                                       |
 | ----------------------- | --------------------- | ------------------------------- | ------------------------------------------------- |
 | **Computer Server**     |
 | `--skip-server-start`   | `SKIP_SERVER_START`   | `false`                         | Skip automatic computer server startup            |
 | **ReportPortal**        |
 | `--enable-reportportal` | `ENABLE_REPORTPORTAL` | `false`                         | Enable ReportPortal integration                   |
 | `--rp-endpoint`         | `RP_ENDPOINT`         | `https://reportportal.menlo.ai` | ReportPortal endpoint URL                         |
 | `--rp-project`          | `RP_PROJECT`          | `default_personal`              | ReportPortal project name                         |
 | `--rp-token`            | `RP_TOKEN`            | -                               | ReportPortal API token (required when RP enabled) |
 | **Jan Application**     |
 | `--jan-app-path`        | `JAN_APP_PATH`        | _auto-detected_                 | Path to Jan application executable                |
 | `--jan-process-name`    | `JAN_PROCESS_NAME`    | `Jan.exe`                       | Jan process name for monitoring                   |
 | **Model Configuration** |
 | `--model-name`          | `MODEL_NAME`          | `ByteDance-Seed/UI-TARS-1.5-7B` | AI model name                                     |
 | `--model-base-url`      | `MODEL_BASE_URL`      | `http://10.200.108.58:1234/v1`  | Model API endpoint                                |
 | `--model-provider`      | `MODEL_PROVIDER`      | `oaicompat`                     | Model provider type                               |
 | `--model-loop`          | `MODEL_LOOP`          | `uitars`                        | Agent loop type                                   |
 | **Test Execution**      |
 | `--max-turns`           | `MAX_TURNS`           | `30`                            | Maximum turns per test                            |
 | `--tests-dir`           | `TESTS_DIR`           | `tests`                         | Directory containing test files                   |
 | `--delay-between-tests` | `DELAY_BETWEEN_TESTS` | `3`                             | Delay between tests (seconds)                     |
 ### Environment Variables
 Create a `.env` file or set environment variables:
 ```bash
 # Computer Server
 SKIP_SERVER_START=false
 # ReportPortal Configuration
 ENABLE_REPORTPORTAL=true
 RP_ENDPOINT=https://reportportal.example.com
 RP_PROJECT=my_project
 RP_TOKEN=your_secret_token
 # Jan Application
 JAN_APP_PATH=C:\Custom\Path\Jan.exe
 JAN_PROCESS_NAME=Jan.exe
 # Model Configuration
 MODEL_NAME=gpt-4
 MODEL_BASE_URL=https://api.openai.com/v1
 MODEL_PROVIDER=openai
 MODEL_LOOP=uitars
 # Test Settings
 MAX_TURNS=50
 TESTS_DIR=e2e_tests
 DELAY_BETWEEN_TESTS=5
 ```
 ## Test Structure
 ### Test Files
 - Test files should be `.txt` files containing test prompts
 - Place test files in the `tests/` directory (or custom directory)
 - Support nested directories for organization
 Example test file (`tests/basic/login_test.txt`):
 ```
 Test the login functionality of Jan application.
 Navigate to login screen, enter valid credentials, and verify successful login.
 ```
 ### Directory Structure
 ```
 autoqa/
 ├── main.py                 # Main test runner
 ├── utils.py               # Jan app utilities
 ├── test_runner.py         # Test execution logic
 ├── screen_recorder.py     # Screen recording functionality
 ├── reportportal_handler.py # ReportPortal integration
 ├── tests/                 # Test files directory
 │   ├── basic/
 │   │   ├── login_test.txt
 │   │   └── navigation_test.txt
 │   └── advanced/
 │       └── complex_workflow.txt
 ├── recordings/            # Screen recordings (auto-created)
 ├── trajectories/          # Agent trajectories (auto-created)
 └── README.md
 ```
 ## Usage Examples
 ### Basic Usage
 ```bash
 # Run all tests locally (auto-starts computer server)
 python main.py
 # Get help
 python main.py --help
 # Run without auto-starting computer server
 python main.py --skip-server-start
 ```
 ### Advanced Usage
 ```bash
 # Custom configuration
 python main.py \
  --tests-dir "integration_tests" \
  --max-turns 40 \
  --delay-between-tests 10 \
  --model-name "gpt-4"
 # Environment + Arguments
 ENABLE_REPORTPORTAL=true RP_TOKEN=secret python main.py --max-turns 50
 # Different model provider
 python main.py \
  --model-provider "openai" \
  --model-name "gpt-4" \
  --model-base-url "https://api.openai.com/v1"
 # External computer server (skip auto-start)
 SKIP_SERVER_START=true python main.py
 ```
 ### CI/CD Usage
 ```bash
 # GitHub Actions / CI environment
 ENABLE_REPORTPORTAL=true \
 RP_TOKEN=${{ secrets.RP_TOKEN }} \
 MODEL_NAME=production-model \
 MAX_TURNS=40 \
 SKIP_SERVER_START=false \
 python main.py
 ```
 ## Computer Server Management
 The test runner automatically manages the computer server:
 ### Automatic Server Management (Default)
 - **Auto-start**: Computer server starts automatically in background thread
 - **Auto-cleanup**: Server stops when main program exits (daemon thread)
 - **Error handling**: Graceful fallback if server fails to start
 ### Manual Server Management
 ```bash
 # If you prefer to manage computer server manually:
 python -m computer_server  # In separate terminal
 # Then run tests without auto-start:
 python main.py --skip-server-start
 ```
 ### Server Logs
 ```
 2025-07-15 15:30:45 - INFO - Starting computer server in background...
 2025-07-15 15:30:45 - INFO - Calling computer_server.run_cli()...
 2025-07-15 15:30:45 - INFO - Computer server thread started
 2025-07-15 15:30:50 - INFO - Computer server is running successfully
 ```
 ## Output
 ### Local Development
 - **Console logs**: Detailed execution information
 - **Screen recordings**: Saved to `recordings/` directory as MP4 files
 - **Trajectories**: Agent interaction data in `trajectories/` directory
 - **Local results**: Test results logged to console
 ### ReportPortal Integration
 When enabled, results are uploaded to ReportPortal including:
 - Test execution status (PASSED/FAILED)
 - Screen recordings as attachments
 - Detailed turn-by-turn interaction logs
 - Error messages and debugging information
 ## Troubleshooting
 ### Common Issues
 1. **Computer server startup failed**:
   ```bash
   # Install required dependencies
   pip install computer_server
   # Check if computer_server is available
   python -c "import computer_server; print('OK')"
   # Use manual server if auto-start fails
   python main.py --skip-server-start
   ```
 2. **Jan app not found**:
   ```bash
   # Specify custom path
   python main.py --jan-app-path "D:/Apps/Jan/Jan.exe"
   ```
 3. **Windows dependencies missing**:
   ```bash
   # Install Windows-specific packages
   pip install pywin32 psutil
   ```
 4. **ReportPortal connection failed**:
   - Verify endpoint URL and token
   - Check network connectivity
   - Ensure project exists
 5. **Screen recording issues**:
   - Check disk space in `recordings/` directory
   - Verify screen recording permissions
 6. **Test timeouts**:
   ```bash
   # Increase turn limit
   python main.py --max-turns 50
   ```
 ### Debug Mode
 Enable detailed logging by modifying the logging level in `main.py`:
 ```python
 logging.basicConfig(level=logging.DEBUG)
 ```
--- a/autoqa/main.py
+++ b/autoqa/main.py
@ -0,0 +1,514 @@
 import asyncio
 import logging
 import os
 import argparse
 import threading
 import time
 import platform
 from datetime import datetime
 from computer import Computer
 from reportportal_client import RPClient
 from reportportal_client.helpers import timestamp
 from utils import scan_test_files
 from test_runner import run_single_test_with_timeout
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler()
    ]
 )
 logger = logging.getLogger(__name__)
 # Platform detection
 IS_WINDOWS = platform.system() == "Windows"
 IS_LINUX = platform.system() == "Linux"
 IS_MACOS = platform.system() == "Darwin"
 def get_computer_config():
    """Get computer configuration based on platform"""
    if IS_WINDOWS:
        return {
            "os_type": "windows"
        }
    elif IS_LINUX:
        return {
            "os_type": "linux"
        }
    elif IS_MACOS:
        return {
            "os_type": "macos"
        }
    else:
        # Default fallback
        logger.warning(f"Unknown platform {platform.system()}, using Linux config as fallback")
        return {
            "os_type": "linux"
        }
 def get_default_jan_path():
    """Get default Jan app path based on OS"""
    if IS_WINDOWS:
        # Try multiple common locations on Windows
        possible_paths = [
            os.path.expanduser(r"~\AppData\Local\Programs\jan\Jan.exe"),
            os.path.join(os.environ.get('LOCALAPPDATA', ''), 'Programs', 'jan', 'Jan.exe'),
            os.path.join(os.environ.get('APPDATA', ''), 'jan', 'Jan.exe'),
            r"C:\Program Files\jan\Jan.exe",
            r"C:\Program Files (x86)\jan\Jan.exe"
        ]
        # Return first existing path, or first option as default
        for path in possible_paths:
            if os.path.exists(path):
                return path
        # If none exist, return the most likely default
        return possible_paths[0]
    elif IS_LINUX:
        # Linux possible locations
        possible_paths = [
            "/usr/bin/Jan",
            "/usr/local/bin/Jan",
            os.path.expanduser("~/Applications/Jan/Jan"),
            "/opt/Jan/Jan"
        ]
        # Return first existing path, or first option as default
        for path in possible_paths:
            if os.path.exists(path):
                return path
        # Default to nightly build path
        return "/usr/bin/Jan"
    elif IS_MACOS:
        # macOS defaults
        possible_paths = [
            "/Applications/Jan.app/Contents/MacOS/Jan",
            os.path.expanduser("~/Applications/Jan.app/Contents/MacOS/Jan")
        ]
        for path in possible_paths:
            if os.path.exists(path):
                return path
        return possible_paths[0]
    else:
        # Unknown platform
        return "jan"
 def start_computer_server():
    """Start computer server in background thread"""
    try:
        logger.info("Starting computer server in background...")
        # Import computer_server module
        import computer_server
        import sys
        # Start server in a separate thread
        def run_server():
            try:
                # Save original sys.argv to avoid argument conflicts
                original_argv = sys.argv.copy()
                # Override sys.argv for computer_server to use default args
                sys.argv = ['computer_server']  # Reset to minimal args
                # Use the proper entry point
                logger.info("Calling computer_server.run_cli()...")
                computer_server.run_cli()
                logger.info("Computer server.run_cli() completed")
            except KeyboardInterrupt:
                logger.info("Computer server interrupted")
            except Exception as e:
                logger.error(f"Computer server error: {e}")
                import traceback
                logger.error(f"Traceback: {traceback.format_exc()}")
            finally:
                # Restore original sys.argv
                try:
                    sys.argv = original_argv
                except:
                    pass
        server_thread = threading.Thread(target=run_server, daemon=True)
        server_thread.start()
        logger.info("Computer server thread started")
        # Give server more time to start up
        time.sleep(5)
        # Check if thread is still alive (server is running)
        if server_thread.is_alive():
            logger.info("Computer server is running successfully")
            return server_thread
        else:
            logger.error("Computer server thread died unexpectedly")
            return None
    except ImportError as e:
        logger.error(f"Cannot import computer_server module: {e}")
        logger.error("Please install computer_server package")
        return None
    except Exception as e:
        logger.error(f"Error starting computer server: {e}")
        import traceback
        logger.error(f"Traceback: {traceback.format_exc()}")
        return None
 def parse_arguments():
    """Parse command line arguments"""
    parser = argparse.ArgumentParser(
        description="E2E Test Runner with ReportPortal integration",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
 Examples:
  # Run locally without ReportPortal
  python main.py
  # Run with ReportPortal integration
  python main.py --enable-reportportal --rp-token YOUR_TOKEN
  # Run with custom Jan app path
  python main.py --jan-app-path "C:/Custom/Path/Jan.exe"
  # Run with different model
  python main.py --model-name "gpt-4" --model-base-url "https://api.openai.com/v1"
  # Using environment variables
  ENABLE_REPORTPORTAL=true RP_TOKEN=xxx MODEL_NAME=gpt-4 python main.py
        """
    )
    # Get default Jan path
    default_jan_path = get_default_jan_path()
    # Computer server arguments
    server_group = parser.add_argument_group('Computer Server Configuration')
    server_group.add_argument(
        '--skip-server-start',
        action='store_true',
        default=os.getenv('SKIP_SERVER_START', 'false').lower() == 'true',
        help='Skip automatic computer server startup (env: SKIP_SERVER_START, default: false)'
    )
    # ReportPortal arguments
    rp_group = parser.add_argument_group('ReportPortal Configuration')
    rp_group.add_argument(
        '--enable-reportportal',
        action='store_true',
        default=os.getenv('ENABLE_REPORTPORTAL', 'false').lower() == 'true',
        help='Enable ReportPortal integration (env: ENABLE_REPORTPORTAL, default: false)'
    )
    rp_group.add_argument(
        '--rp-endpoint',
        default=os.getenv('RP_ENDPOINT', 'https://reportportal.menlo.ai'),
        help='ReportPortal endpoint URL (env: RP_ENDPOINT, default: %(default)s)'
    )
    rp_group.add_argument(
        '--rp-project',
        default=os.getenv('RP_PROJECT', 'default_personal'),
        help='ReportPortal project name (env: RP_PROJECT, default: %(default)s)'
    )
    rp_group.add_argument(
        '--rp-token',
        default=os.getenv('RP_TOKEN'),
        help='ReportPortal API token (env: RP_TOKEN, required when --enable-reportportal is used)'
    )
    rp_group.add_argument(
        '--launch-name',
        default=os.getenv('LAUNCH_NAME'),
        help='Custom launch name for ReportPortal (env: LAUNCH_NAME, default: auto-generated with timestamp)'
    )
    # Jan app arguments
    jan_group = parser.add_argument_group('Jan Application Configuration')
    jan_group.add_argument(
        '--jan-app-path',
        default=os.getenv('JAN_APP_PATH', default_jan_path),
        help=f'Path to Jan application executable (env: JAN_APP_PATH, default: auto-detected or {default_jan_path})'
    )
    jan_group.add_argument(
        '--jan-process-name',
        default=os.getenv('JAN_PROCESS_NAME', 'Jan.exe' if IS_WINDOWS else ('Jan' if IS_MACOS else 'Jan-nightly')),
        help='Jan process name for monitoring (env: JAN_PROCESS_NAME, default: platform-specific)'
    )
    # Model/Agent arguments
    model_group = parser.add_argument_group('Model Configuration')
    model_group.add_argument(
        '--model-loop',
        default=os.getenv('MODEL_LOOP', 'uitars'),
        help='Agent loop type (env: MODEL_LOOP, default: %(default)s)'
    )
    model_group.add_argument(
        '--model-provider',
        default=os.getenv('MODEL_PROVIDER', 'oaicompat'),
        help='Model provider (env: MODEL_PROVIDER, default: %(default)s)'
    )
    model_group.add_argument(
        '--model-name',
        default=os.getenv('MODEL_NAME', 'ByteDance-Seed/UI-TARS-1.5-7B'),
        help='Model name (env: MODEL_NAME, default: %(default)s)'
    )
    model_group.add_argument(
        '--model-base-url',
        default=os.getenv('MODEL_BASE_URL', 'http://10.200.108.58:1234/v1'),
        help='Model base URL (env: MODEL_BASE_URL, default: %(default)s)'
    )
    # Test execution arguments
    test_group = parser.add_argument_group('Test Execution Configuration')
    test_group.add_argument(
        '--max-turns',
        type=int,
        default=int(os.getenv('MAX_TURNS', '30')),
        help='Maximum number of turns per test (env: MAX_TURNS, default: %(default)s)'
    )
    test_group.add_argument(
        '--tests-dir',
        default=os.getenv('TESTS_DIR', 'tests'),
        help='Directory containing test files (env: TESTS_DIR, default: %(default)s)'
    )
    test_group.add_argument(
        '--delay-between-tests',
        type=int,
        default=int(os.getenv('DELAY_BETWEEN_TESTS', '3')),
        help='Delay in seconds between tests (env: DELAY_BETWEEN_TESTS, default: %(default)s)'
    )
    args = parser.parse_args()
    # Validate ReportPortal token if ReportPortal is enabled
    if args.enable_reportportal and not args.rp_token:
        parser.error("--rp-token (or RP_TOKEN env var) is required when --enable-reportportal is used")
    return args
 async def main():
    """
    Main function to scan and run all test files with optional ReportPortal integration
    """
    # Parse command line arguments
    args = parse_arguments()
    # Initialize final exit code
    final_exit_code = 0
    # Start computer server if not skipped
    server_thread = None
    if not args.skip_server_start:
        server_thread = start_computer_server()
        if server_thread is None:
            logger.error("Failed to start computer server. Exiting...")
            return
    else:
        logger.info("Skipping computer server startup (assuming it's already running)")
    try:
        # Build agent config from arguments
        agent_config = {
            "loop": args.model_loop,
            "model_provider": args.model_provider,
            "model_name": args.model_name,
            "model_base_url": args.model_base_url
        }
        # Log configuration
        logger.info("=== Configuration ===")
        logger.info(f"Computer server: {'STARTED' if server_thread else 'EXTERNAL'}")
        logger.info(f"Tests directory: {args.tests_dir}")
        logger.info(f"Max turns per test: {args.max_turns}")
        logger.info(f"Delay between tests: {args.delay_between_tests}s")
        logger.info(f"Jan app path: {args.jan_app_path}")
        logger.info(f"Jan app exists: {os.path.exists(args.jan_app_path)}")
        logger.info(f"Jan process name: {args.jan_process_name}")
        logger.info(f"Model: {args.model_name}")
        logger.info(f"Model URL: {args.model_base_url}")
        logger.info(f"Model provider: {args.model_provider}")
        logger.info(f"ReportPortal integration: {'ENABLED' if args.enable_reportportal else 'DISABLED'}")
        if args.enable_reportportal:
            logger.info(f"ReportPortal endpoint: {args.rp_endpoint}")
            logger.info(f"ReportPortal project: {args.rp_project}")
            logger.info(f"ReportPortal token: {'SET' if args.rp_token else 'NOT SET'}")
            logger.info(f"Launch name: {args.launch_name if args.launch_name else 'AUTO-GENERATED'}")
        logger.info("======================")
        # Scan all test files
        test_files = scan_test_files(args.tests_dir)
        if not test_files:
            logger.warning(f"No test files found in directory: {args.tests_dir}")
            return
        logger.info(f"Found {len(test_files)} test files")
        # Track test results for final exit code
        test_results = {"passed": 0, "failed": 0, "total": len(test_files)}
        # Initialize ReportPortal client only if enabled
        rp_client = None
        launch_id = None
        if args.enable_reportportal:
            try:
                rp_client = RPClient(
                    endpoint=args.rp_endpoint,
                    project=args.rp_project,
                    api_key=args.rp_token
                )
                # Start ReportPortal launch
                current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
                # Use custom launch name if provided, otherwise generate default
                if args.launch_name:
                    launch_name = args.launch_name
                    logger.info(f"Using custom launch name: {launch_name}")
                else:
                    launch_name = f"E2E Test Run - {current_time}"
                    logger.info(f"Using auto-generated launch name: {launch_name}")
                launch_id = rp_client.start_launch(
                    name=launch_name,
                    start_time=timestamp(),
                    description=f"Automated E2E test run with {len(test_files)} test cases\n"
                               f"Model: {args.model_name}\n"
                               f"Max turns: {args.max_turns}"
                )
                logger.info(f"Started ReportPortal launch: {launch_name}")
            except Exception as e:
                logger.error(f"Failed to initialize ReportPortal: {e}")
                logger.warning("Continuing without ReportPortal integration...")
                rp_client = None
                launch_id = None
        else:
            logger.info("Running in local development mode - results will not be uploaded to ReportPortal")
        # Start computer environment
        logger.info("Initializing computer environment...")
        # Get platform-specific computer configuration
        computer_config = get_computer_config()
        logger.info(f"Using computer config: {computer_config}")
        computer = Computer(
            os_type=computer_config["os_type"], 
            use_host_computer_server=True
        )
        await computer.run()
        logger.info("Computer environment ready")
        # Run each test sequentially with turn monitoring
        for i, test_data in enumerate(test_files, 1):
            logger.info(f"Running test {i}/{len(test_files)}: {test_data['path']}")
            try:
                # Pass all configs to test runner
                test_result = await run_single_test_with_timeout(
                    computer=computer, 
                    test_data=test_data, 
                    rp_client=rp_client,  # Can be None
                    launch_id=launch_id,  # Can be None
                    max_turns=args.max_turns,
                    jan_app_path=args.jan_app_path,
                    jan_process_name=args.jan_process_name,
                    agent_config=agent_config,
                    enable_reportportal=args.enable_reportportal
                )
                # Track test result - properly handle different return formats
                test_passed = False
                if test_result:
                    # Check different possible return formats
                    if isinstance(test_result, dict):
                        # Dictionary format: check 'success' key
                        test_passed = test_result.get('success', False)
                    elif isinstance(test_result, bool):
                        # Boolean format: direct boolean value
                        test_passed = test_result
                    elif hasattr(test_result, 'success'):
                        # Object format: check success attribute
                        test_passed = getattr(test_result, 'success', False)
                    else:
                        # Any truthy value is considered success
                        test_passed = bool(test_result)
                else:
                    test_passed = False
                # Update counters and log result
                if test_passed:
                    test_results["passed"] += 1
                    logger.info(f"✅ Test {i} PASSED: {test_data['path']}")
                else:
                    test_results["failed"] += 1
                    logger.error(f"❌ Test {i} FAILED: {test_data['path']}")
                # Debug log for troubleshooting
                logger.info(f"🔍 Debug - Test result: type={type(test_result)}, value={test_result}, success_field={test_result.get('success', 'N/A') if isinstance(test_result, dict) else 'N/A'}, final_passed={test_passed}")
            except Exception as e:
                test_results["failed"] += 1
                logger.error(f"❌ Test {i} FAILED with exception: {test_data['path']} - {e}")
            # Add delay between tests
            if i < len(test_files):
                logger.info(f"Waiting {args.delay_between_tests} seconds before next test...")
                await asyncio.sleep(args.delay_between_tests)
        # Log final test results summary
        logger.info("=" * 50)
        logger.info("TEST EXECUTION SUMMARY")
        logger.info("=" * 50)
        logger.info(f"Total tests: {test_results['total']}")
        logger.info(f"Passed: {test_results['passed']}")
        logger.info(f"Failed: {test_results['failed']}")
        logger.info(f"Success rate: {(test_results['passed']/test_results['total']*100):.1f}%")
        logger.info("=" * 50)
        if test_results["failed"] > 0:
            logger.error(f"❌ Test execution completed with {test_results['failed']} failures!")
            final_exit_code = 1
        else:
            logger.info("✅ All tests completed successfully!")
            final_exit_code = 0
    except KeyboardInterrupt:
        logger.info("Test execution interrupted by user")
        final_exit_code = 1
    except Exception as e:
        logger.error(f"Error in main execution: {e}")
        final_exit_code = 1
    finally:
        # Finish ReportPortal launch only if it was started
        if args.enable_reportportal and rp_client and launch_id:
            try:
                rp_client.finish_launch(
                    launch_id=launch_id,
                    end_time=timestamp()
                )
                rp_client.session.close()
                logger.info("ReportPortal launch finished and session closed")
            except Exception as e:
                logger.error(f"Error finishing ReportPortal launch: {e}")
        # Note: daemon thread will automatically terminate when main program ends
        if server_thread:
            logger.info("Computer server will stop when main program exits (daemon thread)")
    # Exit with appropriate code based on test results
    logger.info(f"Exiting with code: {final_exit_code}")
    exit(final_exit_code)
 if __name__ == "__main__":
    asyncio.run(main())
--- a/autoqa/reportportal_handler.py
+++ b/autoqa/reportportal_handler.py
@ -0,0 +1,307 @@
 import os
 import json
 import mimetypes
 import re
 import logging
 from reportportal_client.helpers import timestamp
 logger = logging.getLogger(__name__)
 def upload_turn_folder(client, test_item_id, turn_path, turn_name, force_fail=False):
    """
    Upload turn folder content to ReportPortal
    """
    step_item_id = client.start_test_item(
        parent_item_id=test_item_id,
        name=turn_name,
        start_time=timestamp(),
        item_type="STEP"
    )
    uploaded = False
    step_has_errors = False  # Track if this step has any errors
    for fname in sorted(os.listdir(turn_path)):
        fpath = os.path.join(turn_path, fname)
        if fname.endswith(".json"):
            try:
                with open(fpath, "r", encoding="utf-8") as f:
                    data = json.load(f)
                client.log(
                    time=timestamp(),
                    level="INFO",
                    message=f"[{fname}]\n{json.dumps(data, indent=2)}",
                    item_id=step_item_id
                )
                uploaded = True
            except Exception as e:
                client.log(
                    time=timestamp(),
                    level="ERROR",
                    message=f"[ERROR parsing {fname}] {str(e)}",
                    item_id=step_item_id
                )
                step_has_errors = True
        elif fname.endswith(".png"):
            try:
                with open(fpath, "rb") as img_file:
                    client.log(
                        time=timestamp(),
                        level="INFO",
                        message=f"Screenshot: {fname}",
                        item_id=step_item_id,
                        attachment={
                            "name": fname,
                            "data": img_file.read(),
                            "mime": mimetypes.guess_type(fname)[0] or "image/png"
                        }
                    )
                uploaded = True
            except Exception as e:
                client.log(
                    time=timestamp(),
                    level="ERROR",
                    message=f"[ERROR attaching {fname}] {str(e)}",
                    item_id=step_item_id
                )
                step_has_errors = True
    if not uploaded:
        client.log(
            time=timestamp(),
            level="WARNING",
            message="No data found in this turn.",
            item_id=step_item_id
        )
    # Determine step status based on test case result
    if force_fail:
        step_status = "FAILED"
    else:
        step_status = "FAILED" if step_has_errors else "PASSED"
    client.finish_test_item(
        item_id=step_item_id,
        end_time=timestamp(),
        status=step_status
    )
 def extract_test_result_from_trajectory(trajectory_dir):
    """
    Extract test result from the last turn's API response
    Returns True only if found {"result": True}, False for all other cases including {"result": False}
    """
    if not trajectory_dir or not os.path.exists(trajectory_dir):
        logger.warning(f"Trajectory directory not found: {trajectory_dir}")
        return False
    try:
        # Get all turn folders and find the last one
        turn_folders = [f for f in os.listdir(trajectory_dir) 
                       if os.path.isdir(os.path.join(trajectory_dir, f)) and f.startswith("turn_")]
        if not turn_folders:
            logger.warning("No turn folders found")
            return False
        # Sort to get the last turn
        last_turn = sorted(turn_folders)[-1]
        last_turn_path = os.path.join(trajectory_dir, last_turn)
        logger.info(f"Checking result in last turn: {last_turn}")
        # Look for API call response files
        response_files = [f for f in os.listdir(last_turn_path) 
                         if f.startswith("api_call_") and f.endswith("_response.json")]
        if not response_files:
            logger.warning("No API response files found in last turn")
            return False
        # Check the last response file
        last_response_file = sorted(response_files)[-1]
        response_file_path = os.path.join(last_turn_path, last_response_file)
        logger.info(f"Checking response file: {last_response_file}")
        with open(response_file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        # Extract content from response
        if 'response' in data and 'choices' in data['response'] and data['response']['choices']:
            last_choice = data['response']['choices'][-1]
            if 'message' in last_choice and 'content' in last_choice['message']:
                content = last_choice['message']['content']
                logger.info(f"Last response content: {content}")
                # Look for result patterns - need to check both True and False
                true_pattern = r'\{\s*"result"\s*:\s*True\s*\}'
                false_pattern = r'\{\s*"result"\s*:\s*False\s*\}'
                true_match = re.search(true_pattern, content)
                false_match = re.search(false_pattern, content)
                if true_match:
                    logger.info(f"Found test result: True - PASSED")
                    return True
                elif false_match:
                    logger.info(f"Found test result: False - FAILED")
                    return False
                else:
                    logger.warning("No valid result pattern found in response content - marking as FAILED")
                    return False
        logger.warning("Could not extract content from response structure")
        return False
    except Exception as e:
        logger.error(f"Error extracting test result: {e}")
        return False
 def upload_test_results_to_rp(client, launch_id, test_path, trajectory_dir, force_stopped=False, video_path=None):
    """
    Upload test results to ReportPortal with proper status based on test result
    """
    if not trajectory_dir or not os.path.exists(trajectory_dir):
        logger.warning(f"Trajectory directory not found: {trajectory_dir}")
        formatted_test_path = test_path.replace('\\', '/').replace('.txt', '').replace('/', '__')
        test_item_id = client.start_test_item(
            launch_id=launch_id,
            name=formatted_test_path,
            start_time=timestamp(),
            item_type="TEST",
            description=f"Test case from: {test_path}"
        )
        client.log(
            time=timestamp(),
            level="ERROR",
            message="❌ TEST FAILED ❌\nNo trajectory directory found",
            item_id=test_item_id
        )
        # Upload video if available
        if video_path and os.path.exists(video_path):
            try:
                with open(video_path, "rb") as video_file:
                    client.log(
                        time=timestamp(),
                        level="INFO",
                        message="Screen recording of test execution",
                        item_id=test_item_id,
                        attachment={
                            "name": f"test_recording_{formatted_test_path}.mp4",
                            "data": video_file.read(),
                            "mime": "video/x-msvideo"
                        }
                    )
                logger.info(f"Uploaded video for failed test: {video_path}")
            except Exception as e:
                logger.error(f"Error uploading video: {e}")
        client.finish_test_item(
            item_id=test_item_id,
            end_time=timestamp(),
            status="FAILED"
        )
        return
    formatted_test_path = test_path.replace('\\', '/').replace('.txt', '').replace('/', '__')
    # Determine final status
    if force_stopped:
        final_status = "FAILED"
        status_message = "exceeded maximum turn limit (30 turns)"
    else:
        test_result = extract_test_result_from_trajectory(trajectory_dir)
        if test_result is True:
            final_status = "PASSED" 
            status_message = "completed successfully with positive result"
        else:
            final_status = "FAILED"
            status_message = "no valid success result found"
    # Create test item
    test_item_id = client.start_test_item(
        launch_id=launch_id,
        name=formatted_test_path,
        start_time=timestamp(),
        item_type="TEST",
        description=f"Test case from: {test_path}"
    )
    try:
        turn_folders = [f for f in os.listdir(trajectory_dir) 
                       if os.path.isdir(os.path.join(trajectory_dir, f)) and f.startswith("turn_")]
        # Add clear status log
        status_emoji = "✅" if final_status == "PASSED" else "❌"
        client.log(
            time=timestamp(),
            level="INFO" if final_status == "PASSED" else "ERROR",
            message=f"{status_emoji} TEST {final_status} {status_emoji}\nReason: {status_message}\nTotal turns: {len(turn_folders)}",
            item_id=test_item_id
        )
        # Upload screen recording video first
        if video_path and os.path.exists(video_path):
            logger.info(f"Attempting to upload video: {video_path}")
            logger.info(f"Video file size: {os.path.getsize(video_path)} bytes")
            try:
                with open(video_path, "rb") as video_file:
                    video_data = video_file.read()
                    logger.info(f"Read video data: {len(video_data)} bytes")
                    client.log(
                        time=timestamp(),
                        level="INFO",
                        message="🎥 Screen recording of test execution",
                        item_id=test_item_id,
                        attachment={
                            "name": f"test_recording_{formatted_test_path}.mp4",
                            "data": video_data,
                            "mime": "video/x-msvideo"
                        }
                    )
                logger.info(f"Successfully uploaded screen recording: {video_path}")
            except Exception as e:
                logger.error(f"Error uploading screen recording: {e}")
                client.log(
                    time=timestamp(),
                    level="WARNING",
                    message=f"Failed to upload screen recording: {str(e)}",
                    item_id=test_item_id
                )
        else:
            logger.warning(f"Video upload skipped - video_path: {video_path}, exists: {os.path.exists(video_path) if video_path else 'N/A'}")
            client.log(
                time=timestamp(),
                level="WARNING",
                message="No screen recording available for this test",
                item_id=test_item_id
            )
        # Upload all turn data with appropriate status
        # If test failed, mark all turns as failed
        force_fail_turns = (final_status == "FAILED")
        for turn_folder in sorted(turn_folders):
            turn_path = os.path.join(trajectory_dir, turn_folder)
            upload_turn_folder(client, test_item_id, turn_path, turn_folder, force_fail=force_fail_turns)
        # Finish with correct status
        client.finish_test_item(
            item_id=test_item_id,
            end_time=timestamp(),
            status=final_status
        )
        logger.info(f"Uploaded test results for {formatted_test_path}: {final_status}")
    except Exception as e:
        logger.error(f"Error uploading test results: {e}")
        client.finish_test_item(
            item_id=test_item_id,
            end_time=timestamp(),
            status="FAILED"
        )
--- a/autoqa/requirements.txt
+++ b/autoqa/requirements.txt
@ -0,0 +1,18 @@
 # Core dependencies
 cua-computer[all]>=0.3.5
 cua-agent[all]>=0.3.0
 cua-agent @ git+https://github.com/menloresearch/cua.git@compute-agent-0.3.0-patch#subdirectory=libs/python/agent
 # ReportPortal integration
 reportportal-client>=5.6.5
 # Screen recording and automation
 opencv-python>=4.12.0
 numpy>=2.2.6
 PyAutoGUI>=0.9.54
 # System utilities
 psutil>=7.0.0
 # Server component
 cua-computer-server>=0.1.19
--- a/autoqa/screen_recorder.py
+++ b/autoqa/screen_recorder.py
@ -0,0 +1,84 @@
 import cv2
 import numpy as np
 import pyautogui
 import threading
 import time
 import logging
 logger = logging.getLogger(__name__)
 class ScreenRecorder:
    def __init__(self, output_path, fps=10):
        self.output_path = output_path
        self.fps = fps
        self.recording = False
        self.writer = None
        self.thread = None
    def start_recording(self):
        """Start screen recording"""
        if self.recording:
            logger.warning("Recording already in progress")
            return
        self.recording = True
        self.thread = threading.Thread(target=self._record_screen, daemon=True)
        self.thread.start()
        logger.info(f"Started screen recording: {self.output_path}")
    def stop_recording(self):
        """Stop screen recording"""
        if not self.recording:
            logger.warning("No recording in progress")
            return
        self.recording = False
        if self.thread:
            self.thread.join(timeout=5)
        if self.writer:
            self.writer.release()
        logger.info(f"Stopped screen recording: {self.output_path}")
    def _record_screen(self):
        """Internal method to record screen"""
        try:
            # Get screen dimensions
            screen_size = pyautogui.size()
            # Try MP4 with H264 codec for better compatibility
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # or 'H264'
            output_path_mp4 = self.output_path
            self.writer = cv2.VideoWriter(
                output_path_mp4, 
                fourcc, 
                self.fps, 
                screen_size
            )
            while self.recording:
                try:
                    # Capture screen
                    screenshot = pyautogui.screenshot()
                    # Convert PIL image to numpy array
                    frame = np.array(screenshot)
                    # Convert RGB to BGR (OpenCV uses BGR)
                    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                    # Write frame
                    self.writer.write(frame)
                    # Control FPS
                    time.sleep(1.0 / self.fps)
                except Exception as e:
                    logger.error(f"Error capturing frame: {e}")
                    break
        except Exception as e:
            logger.error(f"Error in screen recording: {e}")
        finally:
            if self.writer:
                self.writer.release()
--- a/autoqa/scripts/README.md
+++ b/autoqa/scripts/README.md
@ -0,0 +1,116 @@
 # AutoQA Scripts
 This directory contains platform-specific scripts used by the AutoQA GitHub Actions workflow. These scripts help maintain a cleaner and more maintainable workflow file by extracting complex inline scripts into separate files.
 ## Directory Structure
 ```text
 autoqa/scripts/
 ├── setup_permissions.sh        # Setup executable permissions for all scripts
 ├── windows_cleanup.ps1          # Windows: Clean existing Jan installations
 ├── windows_download.ps1         # Windows: Download Jan app installer
 ├── windows_install.ps1          # Windows: Install Jan app
 ├── windows_post_cleanup.ps1     # Windows: Post-test cleanup
 ├── run_tests.ps1               # Windows: Run AutoQA tests
 ├── ubuntu_cleanup.sh           # Ubuntu: Clean existing Jan installations
 ├── ubuntu_download.sh          # Ubuntu: Download Jan app (.deb)
 ├── ubuntu_install.sh           # Ubuntu: Install Jan app
 ├── ubuntu_post_cleanup.sh      # Ubuntu: Post-test cleanup
 ├── macos_cleanup.sh            # macOS: Clean existing Jan installations
 ├── macos_download.sh           # macOS: Download Jan app (.dmg)
 ├── macos_install.sh            # macOS: Install Jan app
 ├── macos_post_cleanup.sh       # macOS: Post-test cleanup
 ├── run_tests.sh                # Unix: Run AutoQA tests (Ubuntu/macOS)
 ├── README.md                   # This file
 └── PERMISSIONS.md              # Permission setup documentation
 ```
 ## Script Functions
 ### Windows Scripts (.ps1)
 - **windows_cleanup.ps1**: Removes existing Jan installations and kills running processes
 - **windows_download.ps1**: Downloads Jan installer with priority-based URL selection
 - **windows_install.ps1**: Installs Jan app and sets environment variables
 - **windows_post_cleanup.ps1**: Comprehensive cleanup after tests including uninstallation
 - **run_tests.ps1**: Runs the AutoQA Python tests with proper arguments
 ### Ubuntu Scripts (.sh)
 - **ubuntu_cleanup.sh**: Removes existing Jan installations and kills running processes
 - **ubuntu_download.sh**: Downloads Jan .deb package with priority-based URL selection
 - **ubuntu_install.sh**: Installs Jan .deb package and sets environment variables
 - **ubuntu_post_cleanup.sh**: Comprehensive cleanup after tests including package removal
 ### macOS Scripts (.sh)
 - **macos_cleanup.sh**: Removes existing Jan installations and kills running processes
 - **macos_download.sh**: Downloads Jan .dmg package with priority-based URL selection
 - **macos_install.sh**: Mounts DMG, extracts .app, and installs to Applications
 - **macos_post_cleanup.sh**: Comprehensive cleanup after tests
 ### Common Scripts
 - **setup_permissions.sh**: Automatically sets executable permissions for all shell scripts
 - **run_tests.sh**: Platform-agnostic test runner for Unix-based systems (Ubuntu/macOS)
 ## Usage in GitHub Actions
 These scripts are called from the `.github/workflows/autoqa.yml` workflow file:
 ```yaml
 # Setup permissions first (Ubuntu/macOS)
 - name: Setup script permissions
  run: |
    chmod +x autoqa/scripts/setup_permissions.sh
    ./autoqa/scripts/setup_permissions.sh
 # Then use scripts without chmod
 - name: Clean existing Jan installations
  run: |
    ./autoqa/scripts/ubuntu_cleanup.sh
 # Windows example (no chmod needed)
 - name: Clean existing Jan installations
  shell: powershell
  run: |
    .\autoqa\scripts\windows_cleanup.ps1
 ```
 ## Benefits
 1. **Maintainability**: Complex scripts are in separate files, easier to read and modify
 2. **Reusability**: Scripts can be reused across different workflows or locally
 3. **Testing**: Scripts can be tested independently
 4. **Version Control**: Better diff tracking for script changes
 5. **Platform Consistency**: Similar functionality across platforms in separate files
 ## Development
 When modifying these scripts:
 1. Test them locally on the respective platforms
 2. Ensure proper error handling and exit codes
 3. Follow platform-specific best practices
 4. Update this README if new scripts are added
 ## Script Parameters
 ### Windows Scripts
 - Most scripts accept `-IsNightly` parameter to handle nightly vs stable builds
 - Download script accepts multiple URL sources with priority ordering
 ### Unix Scripts
 - Most scripts accept positional parameters for nightly flag and URLs
 - Scripts use `$1`, `$2`, etc. for parameter access
 ## Environment Variables
 Scripts set these environment variables for subsequent workflow steps:
 - `JAN_APP_URL`: The selected Jan app download URL
 - `IS_NIGHTLY`: Boolean flag indicating if it's a nightly build
 - `JAN_APP_PATH`: Path to the installed Jan executable
 - `JAN_PROCESS_NAME`: Name of the Jan process for monitoring
--- a/autoqa/scripts/macos_cleanup.sh
+++ b/autoqa/scripts/macos_cleanup.sh
@ -0,0 +1,34 @@
 #!/bin/bash
 # macOS cleanup script for Jan app
 echo "Cleaning existing Jan installations..."
 # Kill any running Jan processes (both regular and nightly)
 pkill -f "Jan" || true
 pkill -f "jan" || true
 pkill -f "Jan-nightly" || true
 pkill -f "jan-nightly" || true
 # Remove Jan app directories
 rm -rf /Applications/Jan.app
 rm -rf /Applications/Jan-nightly.app
 rm -rf ~/Applications/Jan.app
 rm -rf ~/Applications/Jan-nightly.app
 # Remove Jan data folders (both regular and nightly)
 rm -rf ~/Library/Application\ Support/Jan
 rm -rf ~/Library/Application\ Support/Jan-nightly
 rm -rf ~/Library/Application\ Support/jan.ai.app
 rm -rf ~/Library/Application\ Support/jan-nightly.ai.app
 rm -rf ~/Library/Preferences/jan.*
 rm -rf ~/Library/Preferences/jan-nightly.*
 rm -rf ~/Library/Caches/jan.*
 rm -rf ~/Library/Caches/jan-nightly.*
 rm -rf ~/Library/Caches/jan.ai.app
 rm -rf ~/Library/Caches/jan-nightly.ai.app
 rm -rf ~/Library/WebKit/jan.ai.app
 rm -rf ~/Library/WebKit/jan-nightly.ai.app
 rm -rf ~/Library/Saved\ Application\ State/jan.ai.app
 rm -rf ~/Library/Saved\ Application\ State/jan-nightly.ai.app
 echo "Jan cleanup completed"
--- a/autoqa/scripts/macos_download.sh
+++ b/autoqa/scripts/macos_download.sh
@ -0,0 +1,49 @@
 #!/bin/bash
 # macOS download script for Jan app
 WORKFLOW_INPUT_URL="$1"
 WORKFLOW_INPUT_IS_NIGHTLY="$2"
 REPO_VARIABLE_URL="$3"
 REPO_VARIABLE_IS_NIGHTLY="$4"
 DEFAULT_URL="$5"
 DEFAULT_IS_NIGHTLY="$6"
 # Determine Jan app URL and nightly flag from multiple sources (priority order):
 # 1. Workflow dispatch input (manual trigger)
 # 2. Repository variable JAN_APP_URL
 # 3. Default URL from env
 JAN_APP_URL=""
 IS_NIGHTLY="false"
 if [ -n "$WORKFLOW_INPUT_URL" ]; then
    JAN_APP_URL="$WORKFLOW_INPUT_URL"
    IS_NIGHTLY="$WORKFLOW_INPUT_IS_NIGHTLY"
    echo "Using Jan app URL from workflow input: $JAN_APP_URL"
    echo "Is nightly build: $IS_NIGHTLY"
 elif [ -n "$REPO_VARIABLE_URL" ]; then
    JAN_APP_URL="$REPO_VARIABLE_URL"
    IS_NIGHTLY="$REPO_VARIABLE_IS_NIGHTLY"
    echo "Using Jan app URL from repository variable: $JAN_APP_URL"
    echo "Is nightly build: $IS_NIGHTLY"
 else
    JAN_APP_URL="$DEFAULT_URL"
    IS_NIGHTLY="$DEFAULT_IS_NIGHTLY"
    echo "Using default Jan app URL: $JAN_APP_URL"
    echo "Is nightly build: $IS_NIGHTLY"
 fi
 # Export for later steps
 echo "JAN_APP_URL=$JAN_APP_URL" >> $GITHUB_ENV
 echo "IS_NIGHTLY=$IS_NIGHTLY" >> $GITHUB_ENV
 echo "Downloading Jan app from: $JAN_APP_URL"
 curl -L -o "/tmp/jan-installer.dmg" "$JAN_APP_URL"
 if [ ! -f "/tmp/jan-installer.dmg" ]; then
    echo "❌ Failed to download Jan app"
    exit 1
 fi
 echo "✅ Successfully downloaded Jan app"
 ls -la "/tmp/jan-installer.dmg"
--- a/autoqa/scripts/macos_install.sh
+++ b/autoqa/scripts/macos_install.sh
@ -0,0 +1,86 @@
 #!/bin/bash
 # macOS install script for Jan app
 echo "Installing Jan app from DMG..."
 # Mount the DMG
 hdiutil attach "/tmp/jan-installer.dmg" -mountpoint "/tmp/jan-mount"
 # Find the .app file in the mounted DMG
 APP_FILE=$(find "/tmp/jan-mount" -name "*.app" -type d | head -1)
 if [ -z "$APP_FILE" ]; then
    echo "❌ No .app file found in DMG"
    hdiutil detach "/tmp/jan-mount" || true
    exit 1
 fi
 echo "Found app file: $APP_FILE"
 # Copy to Applications directory
 cp -R "$APP_FILE" /Applications/
 # Unmount the DMG
 hdiutil detach "/tmp/jan-mount"
 # Determine app name and executable path
 APP_NAME=$(basename "$APP_FILE")
 echo "App name: $APP_NAME"
 # First, check what's actually in the MacOS folder
 echo "Contents of MacOS folder:"
 ls -la "/Applications/$APP_NAME/Contents/MacOS/"
 # Find all executable files in MacOS folder
 echo "Looking for executable files..."
 find "/Applications/$APP_NAME/Contents/MacOS/" -type f -perm +111 -ls
 # Try to find the main executable - it's usually the one with the same name as the app (without .app)
 APP_BASE_NAME=$(basename "$APP_NAME" .app)
 POTENTIAL_EXECUTABLES=(
    "/Applications/$APP_NAME/Contents/MacOS/$APP_BASE_NAME"
    "/Applications/$APP_NAME/Contents/MacOS/Jan"
    "/Applications/$APP_NAME/Contents/MacOS/Jan-nightly"
 )
 APP_PATH=""
 for potential_exec in "${POTENTIAL_EXECUTABLES[@]}"; do
    echo "Checking: $potential_exec"
    if [ -f "$potential_exec" ] && [ -x "$potential_exec" ]; then
        APP_PATH="$potential_exec"
        echo "Found executable: $APP_PATH"
        break
    fi
 done
 # If still not found, get any executable file
 if [ -z "$APP_PATH" ]; then
    echo "No predefined executable found, searching for any executable..."
    APP_PATH=$(find "/Applications/$APP_NAME/Contents/MacOS/" -type f -perm +111 | head -1)
 fi
 if [ -z "$APP_PATH" ]; then
    echo "❌ No executable found in MacOS folder"
    ls -la "/Applications/$APP_NAME/Contents/MacOS/"
    exit 1
 fi
 PROCESS_NAME=$(basename "$APP_PATH")
 echo "App installed at: /Applications/$APP_NAME"
 echo "Executable path: $APP_PATH"
 echo "Process name: $PROCESS_NAME"
 # Export for test step
 echo "JAN_APP_PATH=$APP_PATH" >> $GITHUB_ENV
 echo "PROCESS_NAME=$PROCESS_NAME" >> $GITHUB_ENV
 # Verify installation
 if [ -f "$APP_PATH" ]; then
    echo "✅ Jan app installed successfully"
    ls -la "/Applications/$APP_NAME"
 else
    echo "❌ Jan app installation failed - executable not found"
    exit 1
 fi
--- a/autoqa/scripts/macos_post_cleanup.sh
+++ b/autoqa/scripts/macos_post_cleanup.sh
@ -0,0 +1,38 @@
 #!/bin/bash
 # macOS post-test cleanup script
 echo "Cleaning up after tests..."
 # Kill any running Jan processes (both regular and nightly)
 pkill -f "Jan" || true
 pkill -f "jan" || true
 pkill -f "Jan-nightly" || true
 pkill -f "jan-nightly" || true
 # Remove Jan app directories
 rm -rf /Applications/Jan.app
 rm -rf /Applications/Jan-nightly.app
 rm -rf ~/Applications/Jan.app
 rm -rf ~/Applications/Jan-nightly.app
 # Remove Jan data folders (both regular and nightly)
 rm -rf ~/Library/Application\ Support/Jan
 rm -rf ~/Library/Application\ Support/Jan-nightly
 rm -rf ~/Library/Application\ Support/jan.ai.app
 rm -rf ~/Library/Application\ Support/jan-nightly.ai.app
 rm -rf ~/Library/Preferences/jan.*
 rm -rf ~/Library/Preferences/jan-nightly.*
 rm -rf ~/Library/Caches/jan.*
 rm -rf ~/Library/Caches/jan-nightly.*
 rm -rf ~/Library/Caches/jan.ai.app
 rm -rf ~/Library/Caches/jan-nightly.ai.app
 rm -rf ~/Library/WebKit/jan.ai.app
 rm -rf ~/Library/WebKit/jan-nightly.ai.app
 rm -rf ~/Library/Saved\ Application\ State/jan.ai.app
 rm -rf ~/Library/Saved\ Application\ State/jan-nightly.ai.app
 # Clean up downloaded installer
 rm -f "/tmp/jan-installer.dmg"
 rm -rf "/tmp/jan-mount"
 echo "Cleanup completed"
--- a/autoqa/scripts/run_tests.ps1
+++ b/autoqa/scripts/run_tests.ps1
@ -0,0 +1,31 @@
 #!/usr/bin/env pwsh
 # Windows test runner script
 param(
    [string]$JanAppPath,
    [string]$ProcessName,
    [string]$RpToken
 )
 Write-Host "Starting Auto QA Tests..."
 Write-Host "Jan app path: $JanAppPath"
 Write-Host "Process name: $ProcessName"
 Write-Host "Current working directory: $(Get-Location)"
 Write-Host "Contents of current directory:"
 Get-ChildItem
 Write-Host "Contents of trajectories directory (if exists):"
 if (Test-Path "trajectories") {
    Get-ChildItem "trajectories"
 } else {
    Write-Host "trajectories directory not found"
 }
 # Run the main test with proper arguments
 if ($JanAppPath -and $ProcessName) {
    python main.py --enable-reportportal --rp-token "$RpToken" --jan-app-path "$JanAppPath" --jan-process-name "$ProcessName"
 } elseif ($JanAppPath) {
    python main.py --enable-reportportal --rp-token "$RpToken" --jan-app-path "$JanAppPath"
 } else {
    python main.py --enable-reportportal --rp-token "$RpToken"
 }
--- a/autoqa/scripts/run_tests.sh
+++ b/autoqa/scripts/run_tests.sh
@ -0,0 +1,69 @@
 #!/bin/bash
 # Common test runner script
 JAN_APP_PATH="$1"
 PROCESS_NAME="$2"
 RP_TOKEN="$3"
 PLATFORM="$4"
 echo "Starting Auto QA Tests..."
 echo "Platform: $PLATFORM"
 echo "Jan app path: $JAN_APP_PATH"
 echo "Process name: $PROCESS_NAME"
 # Platform-specific setup
 if [ "$PLATFORM" = "ubuntu" ]; then
    # Get the current display session
    export DISPLAY=$(w -h | awk 'NR==1 {print $2}')
    echo "Display ID: $DISPLAY"
    # Verify display is working
    if [ -z "$DISPLAY" ]; then
        echo "No display session found, falling back to :0"
        export DISPLAY=:0
    fi
    echo "Using display: $DISPLAY"
    # Test display connection
    xdpyinfo -display $DISPLAY >/dev/null 2>&1 || {
        echo "Display $DISPLAY is not available"
        exit 1
    }
    # Make Jan executable if needed
    if [ -f "/usr/bin/Jan-nightly" ]; then
        sudo chmod +x /usr/bin/Jan-nightly
    fi
    if [ -f "/usr/bin/Jan" ]; then
        sudo chmod +x /usr/bin/Jan
    fi
 fi
 # macOS specific setup
 if [ "$PLATFORM" = "macos" ]; then
    # Verify Jan app path
    if [ ! -f "$JAN_APP_PATH" ]; then
        echo "❌ Jan app not found at: $JAN_APP_PATH"
        echo "Available files in /Applications:"
        ls -la /Applications/ | grep -i jan || echo "No Jan apps found"
        exit 1
    fi
 fi
 # Change to autoqa directory to ensure correct working directory
 cd "$(dirname "$0")/.."
 echo "Current working directory: $(pwd)"
 echo "Contents of current directory:"
 ls -la
 echo "Contents of trajectories directory (if exists):"
 ls -la trajectories/ 2>/dev/null || echo "trajectories directory not found"
 # Run the main test with proper arguments
 if [ -n "$JAN_APP_PATH" ] && [ -n "$PROCESS_NAME" ]; then
    python main.py --enable-reportportal --rp-token "$RP_TOKEN" --jan-app-path "$JAN_APP_PATH" --jan-process-name "$PROCESS_NAME"
 elif [ -n "$JAN_APP_PATH" ]; then
    python main.py --enable-reportportal --rp-token "$RP_TOKEN" --jan-app-path "$JAN_APP_PATH"
 else
    python main.py --enable-reportportal --rp-token "$RP_TOKEN"
 fi
--- a/autoqa/scripts/setup_permissions.sh
+++ b/autoqa/scripts/setup_permissions.sh
@ -0,0 +1,15 @@
 #!/bin/bash
 # Setup script permissions for AutoQA scripts
 echo "Setting up permissions for AutoQA scripts..."
 # Get the directory where this script is located
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # Make all shell scripts executable
 chmod +x "$SCRIPT_DIR"/*.sh
 echo "✅ All shell scripts are now executable:"
 ls -la "$SCRIPT_DIR"/*.sh
 echo "✅ Permission setup completed"
--- a/autoqa/scripts/ubuntu_cleanup.sh
+++ b/autoqa/scripts/ubuntu_cleanup.sh
@ -0,0 +1,22 @@
 #!/bin/bash
 # Ubuntu cleanup script for Jan app
 echo "Cleaning existing Jan installations..."
 # Remove Jan data folders (both regular and nightly)
 rm -rf ~/.config/Jan
 rm -rf ~/.config/Jan-nightly
 rm -rf ~/.local/share/Jan
 rm -rf ~/.local/share/Jan-nightly
 rm -rf ~/.cache/jan
 rm -rf ~/.cache/jan-nightly
 rm -rf ~/.local/share/jan-nightly.ai.app
 rm -rf ~/.local/share/jan.ai.app
 # Kill any running Jan processes (both regular and nightly)
 pkill -f "Jan" || true
 pkill -f "jan" || true
 pkill -f "Jan-nightly" || true
 pkill -f "jan-nightly" || true
 echo "Jan cleanup completed"
--- a/autoqa/scripts/ubuntu_download.sh
+++ b/autoqa/scripts/ubuntu_download.sh
@ -0,0 +1,57 @@
 #!/bin/bash
 # Ubuntu download script for Jan app
 WORKFLOW_INPUT_URL="$1"
 WORKFLOW_INPUT_IS_NIGHTLY="$2"
 REPO_VARIABLE_URL="$3"
 REPO_VARIABLE_IS_NIGHTLY="$4"
 DEFAULT_URL="$5"
 DEFAULT_IS_NIGHTLY="$6"
 # Determine Jan app URL and nightly flag from multiple sources (priority order):
 # 1. Workflow dispatch input (manual trigger)
 # 2. Repository variable JAN_APP_URL_LINUX
 # 3. Default URL from env
 JAN_APP_URL=""
 IS_NIGHTLY=false
 if [ -n "$WORKFLOW_INPUT_URL" ]; then
    JAN_APP_URL="$WORKFLOW_INPUT_URL"
    IS_NIGHTLY="$WORKFLOW_INPUT_IS_NIGHTLY"
    echo "Using Jan app URL from workflow input: $JAN_APP_URL"
    echo "Is nightly build: $IS_NIGHTLY"
 elif [ -n "$REPO_VARIABLE_URL" ]; then
    JAN_APP_URL="$REPO_VARIABLE_URL"
    IS_NIGHTLY="$REPO_VARIABLE_IS_NIGHTLY"
    echo "Using Jan app URL from repository variable: $JAN_APP_URL"
    echo "Is nightly build: $IS_NIGHTLY"
 else
    JAN_APP_URL="$DEFAULT_URL"
    IS_NIGHTLY="$DEFAULT_IS_NIGHTLY"
    echo "Using default Jan app URL: $JAN_APP_URL"
    echo "Is nightly build: $IS_NIGHTLY"
 fi
 # Set environment variables for later steps
 echo "JAN_APP_URL=$JAN_APP_URL" >> $GITHUB_ENV
 echo "IS_NIGHTLY=$IS_NIGHTLY" >> $GITHUB_ENV
 echo "Downloading Jan app from: $JAN_APP_URL"
 DOWNLOAD_PATH="/tmp/jan-installer.deb"
 # Download the package
 if ! wget "$JAN_APP_URL" -O "$DOWNLOAD_PATH"; then
    echo "Failed to download Jan app"
    exit 1
 fi
 if [ -f "$DOWNLOAD_PATH" ]; then
    FILE_SIZE=$(stat -c%s "$DOWNLOAD_PATH")
    echo "Downloaded Jan app successfully. Size: $FILE_SIZE bytes"
    echo "File saved to: $DOWNLOAD_PATH"
 else
    echo "Downloaded file not found"
    exit 1
 fi
--- a/autoqa/scripts/ubuntu_install.sh
+++ b/autoqa/scripts/ubuntu_install.sh
@ -0,0 +1,34 @@
 #!/bin/bash
 # Ubuntu install script for Jan app
 IS_NIGHTLY="$1"
 INSTALLER_PATH="/tmp/jan-installer.deb"
 echo "Installing Jan app..."
 echo "Is nightly build: $IS_NIGHTLY"
 # Install the .deb package
 sudo apt install "$INSTALLER_PATH" -y
 sudo apt-get install -f -y
 # Wait for installation to complete
 sleep 10
 # Verify installation based on nightly flag
 if [ "$IS_NIGHTLY" = "true" ]; then
    DEFAULT_JAN_PATH="/usr/bin/Jan-nightly"
    PROCESS_NAME="Jan-nightly"
 else
    DEFAULT_JAN_PATH="/usr/bin/Jan"
    PROCESS_NAME="Jan"
 fi
 if [ -f "$DEFAULT_JAN_PATH" ]; then
    echo "Jan app installed successfully at: $DEFAULT_JAN_PATH"
    echo "JAN_APP_PATH=$DEFAULT_JAN_PATH" >> $GITHUB_ENV
    echo "JAN_PROCESS_NAME=$PROCESS_NAME" >> $GITHUB_ENV
 else
    echo "Jan app not found at expected location: $DEFAULT_JAN_PATH"
    echo "Will auto-detect during test run"
 fi
--- a/autoqa/scripts/ubuntu_post_cleanup.sh
+++ b/autoqa/scripts/ubuntu_post_cleanup.sh
@ -0,0 +1,44 @@
 #!/bin/bash
 # Ubuntu post-test cleanup script
 IS_NIGHTLY="$1"
 echo "Cleaning up after tests..."
 # Kill any running Jan processes (both regular and nightly)
 pkill -f "Jan" || true
 pkill -f "jan" || true
 pkill -f "Jan-nightly" || true
 pkill -f "jan-nightly" || true
 # Remove Jan data folders (both regular and nightly)
 rm -rf ~/.config/Jan
 rm -rf ~/.config/Jan-nightly
 rm -rf ~/.local/share/Jan
 rm -rf ~/.local/share/Jan-nightly
 rm -rf ~/.cache/jan
 rm -rf ~/.cache/jan-nightly
 rm -rf ~/.local/share/jan-nightly.ai.app
 rm -rf ~/.local/share/jan.ai.app
 # Try to uninstall Jan app
 if [ "$IS_NIGHTLY" = "true" ]; then
    PACKAGE_NAME="jan-nightly"
 else
    PACKAGE_NAME="jan"
 fi
 echo "Attempting to uninstall package: $PACKAGE_NAME"
 if dpkg -l | grep -q "$PACKAGE_NAME"; then
    echo "Found package $PACKAGE_NAME, uninstalling..."
    sudo dpkg -r "$PACKAGE_NAME" || true
    sudo apt-get autoremove -y || true
 else
    echo "Package $PACKAGE_NAME not found in dpkg list"
 fi
 # Clean up downloaded installer
 rm -f "/tmp/jan-installer.deb"
 echo "Cleanup completed"
--- a/autoqa/scripts/windows_cleanup.ps1
+++ b/autoqa/scripts/windows_cleanup.ps1
@ -0,0 +1,50 @@
 #!/usr/bin/env pwsh
 # Windows cleanup script for Jan app
 param(
    [string]$IsNightly = "false"
 )
 Write-Host "Cleaning existing Jan installations..."
 # Remove Jan data folders (both regular and nightly)
 $janAppData = "$env:APPDATA\Jan"
 $janNightlyAppData = "$env:APPDATA\Jan-nightly"
 $janLocalAppData = "$env:LOCALAPPDATA\jan.ai.app"
 $janNightlyLocalAppData = "$env:LOCALAPPDATA\jan-nightly.ai.app"
 if (Test-Path $janAppData) {
    Write-Host "Removing $janAppData"
    Remove-Item -Path $janAppData -Recurse -Force -ErrorAction SilentlyContinue
 }
 if (Test-Path $janNightlyAppData) {
    Write-Host "Removing $janNightlyAppData"
    Remove-Item -Path $janNightlyAppData -Recurse -Force -ErrorAction SilentlyContinue
 }
 if (Test-Path $janLocalAppData) {
    Write-Host "Removing $janLocalAppData"
    Remove-Item -Path $janLocalAppData -Recurse -Force -ErrorAction SilentlyContinue
 }
 if (Test-Path $janNightlyLocalAppData) {
    Write-Host "Removing $janNightlyLocalAppData"
    Remove-Item -Path $janNightlyLocalAppData -Recurse -Force -ErrorAction SilentlyContinue
 }
 # Kill any running Jan processes (both regular and nightly)
 Get-Process -Name "Jan" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
 Get-Process -Name "jan" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
 Get-Process -Name "Jan-nightly" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
 Get-Process -Name "jan-nightly" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
 # Remove Jan extensions folder
 $janExtensionsPath = "$env:USERPROFILE\jan\extensions"
 if (Test-Path $janExtensionsPath) {
    Write-Host "Removing $janExtensionsPath"
    Remove-Item -Path $janExtensionsPath -Recurse -Force -ErrorAction SilentlyContinue
 }
 Write-Host "Jan cleanup completed"
--- a/autoqa/scripts/windows_download.ps1
+++ b/autoqa/scripts/windows_download.ps1
@ -0,0 +1,63 @@
 #!/usr/bin/env pwsh
 # Windows download script for Jan app
 param(
    [string]$WorkflowInputUrl = "",
    [string]$WorkflowInputIsNightly = "",
    [string]$RepoVariableUrl = "",
    [string]$RepoVariableIsNightly = "",
    [string]$DefaultUrl = "",
    [string]$DefaultIsNightly = ""
 )
 # Determine Jan app URL and nightly flag from multiple sources (priority order):
 # 1. Workflow dispatch input (manual trigger)
 # 2. Repository variable JAN_APP_URL
 # 3. Default URL from env
 $janAppUrl = ""
 $isNightly = $false
 if ($WorkflowInputUrl -ne "") {
    $janAppUrl = $WorkflowInputUrl
    $isNightly = [System.Convert]::ToBoolean($WorkflowInputIsNightly)
    Write-Host "Using Jan app URL from workflow input: $janAppUrl"
    Write-Host "Is nightly build: $isNightly"
 }
 elseif ($RepoVariableUrl -ne "") {
    $janAppUrl = $RepoVariableUrl
    $isNightly = [System.Convert]::ToBoolean($RepoVariableIsNightly)
    Write-Host "Using Jan app URL from repository variable: $janAppUrl"
    Write-Host "Is nightly build: $isNightly"
 }
 else {
    $janAppUrl = $DefaultUrl
    $isNightly = [System.Convert]::ToBoolean($DefaultIsNightly)
    Write-Host "Using default Jan app URL: $janAppUrl"
    Write-Host "Is nightly build: $isNightly"
 }
 # Set environment variables for later steps
 Write-Output "JAN_APP_URL=$janAppUrl" >> $env:GITHUB_ENV
 Write-Output "IS_NIGHTLY=$isNightly" >> $env:GITHUB_ENV
 Write-Host "Downloading Jan app from: $janAppUrl"
 $downloadPath = "$env:TEMP\jan-installer.exe"
 try {
    # Use wget for better performance
    wget.exe "$janAppUrl" -O "$downloadPath"
    if (Test-Path $downloadPath) {
        $fileSize = (Get-Item $downloadPath).Length
        Write-Host "Downloaded Jan app successfully. Size: $fileSize bytes"
        Write-Host "File saved to: $downloadPath"
    } else {
        throw "Downloaded file not found"
    }
 }
 catch {
    Write-Error "Failed to download Jan app: $_"
    exit 1
 }
--- a/autoqa/scripts/windows_install.ps1
+++ b/autoqa/scripts/windows_install.ps1
@ -0,0 +1,43 @@
 #!/usr/bin/env pwsh
 # Windows install script for Jan app
 param(
    [string]$IsNightly = "false"
 )
 $installerPath = "$env:TEMP\jan-installer.exe"
 $isNightly = [System.Convert]::ToBoolean($IsNightly)
 Write-Host "Installing Jan app..."
 Write-Host "Is nightly build: $isNightly"
 # Try silent installation first
 try {
    Start-Process -FilePath $installerPath -ArgumentList "/S" -Wait -NoNewWindow
    Write-Host "Jan app installed silently"
 }
 catch {
    Write-Host "Silent installation failed, trying normal installation..."
    Start-Process -FilePath $installerPath -Wait -NoNewWindow
 }
 # Wait a bit for installation to complete
 Start-Sleep -Seconds 10
 # Verify installation based on nightly flag
 if ($isNightly) {
    $defaultJanPath = "$env:LOCALAPPDATA\Programs\jan-nightly\Jan-nightly.exe"
    $processName = "Jan-nightly.exe"
 } else {
    $defaultJanPath = "$env:LOCALAPPDATA\Programs\jan\Jan.exe"
    $processName = "Jan.exe"
 }
 if (Test-Path $defaultJanPath) {
    Write-Host "Jan app installed successfully at: $defaultJanPath"
    Write-Output "JAN_APP_PATH=$defaultJanPath" >> $env:GITHUB_ENV
    Write-Output "JAN_PROCESS_NAME=$processName" >> $env:GITHUB_ENV
 } else {
    Write-Warning "Jan app not found at expected location: $defaultJanPath"
    Write-Host "Will auto-detect during test run"
 }
--- a/autoqa/scripts/windows_post_cleanup.ps1
+++ b/autoqa/scripts/windows_post_cleanup.ps1
@ -0,0 +1,102 @@
 #!/usr/bin/env pwsh
 # Windows post-test cleanup script
 param(
    [string]$IsNightly = "false"
 )
 Write-Host "Cleaning up after tests..."
 # Kill any running Jan processes (both regular and nightly)
 Get-Process -Name "Jan" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
 Get-Process -Name "jan" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
 Get-Process -Name "Jan-nightly" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
 Get-Process -Name "jan-nightly" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
 # Remove Jan data folders (both regular and nightly)
 $janAppData = "$env:APPDATA\Jan"
 $janNightlyAppData = "$env:APPDATA\Jan-nightly"
 $janLocalAppData = "$env:LOCALAPPDATA\jan.ai.app"
 $janNightlyLocalAppData = "$env:LOCALAPPDATA\jan-nightly.ai.app"
 $janProgramsPath = "$env:LOCALAPPDATA\Programs\Jan"
 $janNightlyProgramsPath = "$env:LOCALAPPDATA\Programs\Jan-nightly"
 if (Test-Path $janAppData) {
    Write-Host "Removing $janAppData"
    Remove-Item -Path $janAppData -Recurse -Force -ErrorAction SilentlyContinue
 }
 if (Test-Path $janNightlyAppData) {
    Write-Host "Removing $janNightlyAppData"
    Remove-Item -Path $janNightlyAppData -Recurse -Force -ErrorAction SilentlyContinue
 }
 if (Test-Path $janLocalAppData) {
    Write-Host "Removing $janLocalAppData"
    Remove-Item -Path $janLocalAppData -Recurse -Force -ErrorAction SilentlyContinue
 }
 if (Test-Path $janNightlyLocalAppData) {
    Write-Host "Removing $janNightlyLocalAppData"
    Remove-Item -Path $janNightlyLocalAppData -Recurse -Force -ErrorAction SilentlyContinue
 }
 if (Test-Path $janProgramsPath) {
    Write-Host "Removing $janProgramsPath"
    Remove-Item -Path $janProgramsPath -Recurse -Force -ErrorAction SilentlyContinue
 }
 if (Test-Path $janNightlyProgramsPath) {
    Write-Host "Removing $janNightlyProgramsPath"
    Remove-Item -Path $janNightlyProgramsPath -Recurse -Force -ErrorAction SilentlyContinue
 }
 # Remove Jan extensions folder
 $janExtensionsPath = "$env:USERPROFILE\jan\extensions"
 if (Test-Path $janExtensionsPath) {
    Write-Host "Removing $janExtensionsPath"
    Remove-Item -Path $janExtensionsPath -Recurse -Force -ErrorAction SilentlyContinue
 }
 # Try to uninstall Jan app silently
 try {
    $isNightly = [System.Convert]::ToBoolean($IsNightly)
    # Determine uninstaller path based on nightly flag
    if ($isNightly) {
        $uninstallerPath = "$env:LOCALAPPDATA\Programs\jan-nightly\uninstall.exe"
        $installPath = "$env:LOCALAPPDATA\Programs\jan-nightly"
    } else {
        $uninstallerPath = "$env:LOCALAPPDATA\Programs\jan\uninstall.exe"
        $installPath = "$env:LOCALAPPDATA\Programs\jan"
    }
    Write-Host "Looking for uninstaller at: $uninstallerPath"
    if (Test-Path $uninstallerPath) {
        Write-Host "Found uninstaller, attempting silent uninstall..."
        Start-Process -FilePath $uninstallerPath -ArgumentList "/S" -Wait -NoNewWindow -ErrorAction SilentlyContinue
        Write-Host "Uninstall completed"
    } else {
        Write-Host "No uninstaller found, attempting manual cleanup..."
        if (Test-Path $installPath) {
            Write-Host "Removing installation directory: $installPath"
            Remove-Item -Path $installPath -Recurse -Force -ErrorAction SilentlyContinue
        }
    }
    Write-Host "Jan app cleanup completed"
 }
 catch {
    Write-Warning "Failed to uninstall Jan app cleanly: $_"
    Write-Host "Manual cleanup may be required"
 }
 # Clean up downloaded installer
 $installerPath = "$env:TEMP\jan-installer.exe"
 if (Test-Path $installerPath) {
    Remove-Item -Path $installerPath -Force -ErrorAction SilentlyContinue
 }
 Write-Host "Cleanup completed"
--- a/autoqa/test_runner.py
+++ b/autoqa/test_runner.py
@ -0,0 +1,319 @@
 import os
 import asyncio
 import threading
 import time
 import logging
 from datetime import datetime
 from pathlib import Path
 # from computer import Computer
 from agent import ComputerAgent, LLM
 from utils import is_jan_running, force_close_jan, start_jan_app, get_latest_trajectory_folder
 from screen_recorder import ScreenRecorder
 from reportportal_handler import upload_test_results_to_rp
 from reportportal_client.helpers import timestamp
 logger = logging.getLogger(__name__)
 async def run_single_test_with_timeout(computer, test_data, rp_client, launch_id, max_turns=30, 
                                     jan_app_path=None, jan_process_name="Jan.exe", agent_config=None, 
                                     enable_reportportal=False):
    """
    Run a single test case with turn count monitoring, forced stop, and screen recording
    Returns dict with test result: {"success": bool, "status": str, "message": str}
    """
    path = test_data['path']
    prompt = test_data['prompt']
    # Default agent config if not provided
    if agent_config is None:
        agent_config = {
            "loop": "uitars",
            "model_provider": "oaicompat",
            "model_name": "ByteDance-Seed/UI-TARS-1.5-7B",
            "model_base_url": "http://10.200.108.58:1234/v1"
        }
    # Create trajectory_dir from path (remove .txt extension)
    trajectory_name = str(Path(path).with_suffix(''))
    trajectory_base_dir = os.path.abspath(f"trajectories/{trajectory_name.replace(os.sep, '/')}")
    # Ensure trajectories directory exists
    os.makedirs(os.path.dirname(trajectory_base_dir), exist_ok=True)
    # Create recordings directory
    recordings_dir = "recordings"
    os.makedirs(recordings_dir, exist_ok=True)
    # Create video filename
    current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
    safe_test_name = trajectory_name.replace('/', '_').replace('\\', '_')
    video_filename = f"{safe_test_name}_{current_time}.mp4"
    video_path = os.path.abspath(os.path.join(recordings_dir, video_filename))
    # Initialize result tracking
    test_result_data = {
        "success": False,
        "status": "UNKNOWN",
        "message": "Test execution incomplete",
        "trajectory_dir": None,
        "video_path": video_path
    }
    logger.info(f"Starting test: {path}")
    logger.info(f"Current working directory: {os.getcwd()}")
    logger.info(f"Trajectory base directory: {trajectory_base_dir}")
    logger.info(f"Screen recording will be saved to: {video_path}")
    logger.info(f"Using model: {agent_config['model_name']} from {agent_config['model_base_url']}")
    logger.info(f"ReportPortal upload: {'ENABLED' if enable_reportportal else 'DISABLED'}")
    trajectory_dir = None
    agent_task = None
    monitor_stop_event = threading.Event()
    force_stopped_due_to_turns = False  # Track if test was force stopped
    # Initialize screen recorder
    recorder = ScreenRecorder(video_path, fps=10)
    try:
        # Step 1: Check and force close Jan app if running
        if is_jan_running(jan_process_name):
            logger.info("Jan application is running, force closing...")
            force_close_jan(jan_process_name)
        # Step 2: Start Jan app in maximized mode
        if jan_app_path:
            start_jan_app(jan_app_path)
        else:
            start_jan_app()  # Use default path
        # Step 3: Start screen recording
        recorder.start_recording()
        # Step 4: Create agent for this test using config
        agent = ComputerAgent(
            computer=computer,
            loop=agent_config["loop"],
            model=LLM(
                provider=agent_config["model_provider"],
                name=agent_config["model_name"],
                provider_base_url=agent_config["model_base_url"]
            ),
            trajectory_dir=trajectory_base_dir
        )
        # Step 5: Start monitoring thread
        def monitor_thread():
            nonlocal force_stopped_due_to_turns
            while not monitor_stop_event.is_set():
                try:
                    if os.path.exists(trajectory_base_dir):
                        folders = [f for f in os.listdir(trajectory_base_dir) 
                                  if os.path.isdir(os.path.join(trajectory_base_dir, f))]
                        if folders:
                            latest_folder = sorted(folders)[-1]
                            latest_folder_path = os.path.join(trajectory_base_dir, latest_folder)
                            if os.path.exists(latest_folder_path):
                                turn_folders = [f for f in os.listdir(latest_folder_path) 
                                               if os.path.isdir(os.path.join(latest_folder_path, f)) and f.startswith("turn_")]
                                turn_count = len(turn_folders)
                                logger.info(f"Current turn count: {turn_count}")
                                if turn_count >= max_turns:
                                    logger.warning(f"Turn count exceeded {max_turns} for test {path}, forcing stop")
                                    force_stopped_due_to_turns = True  # Mark as force stopped
                                    # Cancel the agent task
                                    if agent_task and not agent_task.done():
                                        agent_task.cancel()
                                    monitor_stop_event.set()
                                    return
                    # Check every 5 seconds
                    if not monitor_stop_event.wait(5):
                        continue
                    else:
                        break
                except Exception as e:
                    logger.error(f"Error in monitor thread: {e}")
                    time.sleep(5)
        # Start monitoring in background thread
        monitor_thread_obj = threading.Thread(target=monitor_thread, daemon=True)
        monitor_thread_obj.start()
        # Step 6: Run the test with prompt
        logger.info(f"Running test case: {path}")
        try:
            # Create the agent task
            async def run_agent():
                async for result in agent.run(prompt):
                    if monitor_stop_event.is_set():
                        logger.warning(f"Test {path} stopped due to turn limit")
                        break
                    logger.info(f"Test result for {path}: {result}")
                    print(result)
            agent_task = asyncio.create_task(run_agent())
            # Wait for agent task to complete or timeout
            try:
                await asyncio.wait_for(agent_task, timeout=600)  # 10 minute timeout as backup
                if not monitor_stop_event.is_set():
                    logger.info(f"Successfully completed test execution: {path}")
                else:
                    logger.warning(f"Test {path} was stopped due to turn limit")
            except asyncio.TimeoutError:
                logger.warning(f"Test {path} timed out after 10 minutes")
                agent_task.cancel()
            except asyncio.CancelledError:
                logger.warning(f"Test {path} was cancelled due to turn limit")
        finally:
            # Stop monitoring
            monitor_stop_event.set()
    except Exception as e:
        logger.error(f"Error running test {path}: {e}")
        monitor_stop_event.set()
        # Update result data for exception case
        test_result_data.update({
            "success": False,
            "status": "ERROR",
            "message": f"Test execution failed with exception: {str(e)}",
            "trajectory_dir": None
        })
    finally:
        # Step 7: Stop screen recording
        try:
            recorder.stop_recording()
            logger.info(f"Screen recording saved to: {video_path}")
        except Exception as e:
            logger.error(f"Error stopping screen recording: {e}")
        # Step 8: Upload results to ReportPortal only if enabled
        if enable_reportportal and rp_client and launch_id:
            # Get trajectory folder first
            trajectory_dir = get_latest_trajectory_folder(trajectory_base_dir)
            try:
                if trajectory_dir:
                    logger.info(f"Uploading results to ReportPortal for: {path}")
                    logger.info(f"Video path for upload: {video_path}")
                    logger.info(f"Video exists: {os.path.exists(video_path)}")
                    if os.path.exists(video_path):
                        logger.info(f"Video file size: {os.path.getsize(video_path)} bytes")
                    upload_test_results_to_rp(rp_client, launch_id, path, trajectory_dir, force_stopped_due_to_turns, video_path)
                else:
                    logger.warning(f"Test completed but no trajectory found for: {path}")
                    # Handle case where test completed but no trajectory found
                    formatted_test_path = path.replace('\\', '/').replace('.txt', '').replace('/', '__')
                    test_item_id = rp_client.start_test_item(
                        launch_id=launch_id,
                        name=formatted_test_path,
                        start_time=timestamp(),
                        item_type="TEST"
                    )
                    rp_client.log(
                        time=timestamp(),
                        level="ERROR",
                        message="Test execution completed but no trajectory data found",
                        item_id=test_item_id
                    )
                    # Still upload video for failed test
                    if video_path and os.path.exists(video_path):
                        try:
                            with open(video_path, "rb") as video_file:
                                rp_client.log(
                                    time=timestamp(),
                                    level="INFO",
                                    message="🎥 Screen recording of failed test",
                                    item_id=test_item_id,
                                    attachment={
                                        "name": f"failed_test_recording_{formatted_test_path}.mp4",
                                        "data": video_file.read(),
                                        "mime": "video/x-msvideo"
                                    }
                                )
                        except Exception as e:
                            logger.error(f"Error uploading video for failed test: {e}")
                    rp_client.finish_test_item(
                        item_id=test_item_id,
                        end_time=timestamp(),
                        status="FAILED"
                    )
            except Exception as upload_error:
                logger.error(f"Error uploading results for {path}: {upload_error}")
        else:
            # For non-ReportPortal mode, still get trajectory for final results
            trajectory_dir = get_latest_trajectory_folder(trajectory_base_dir)
        # Always process results for consistency (both RP and local mode)
        # trajectory_dir is already set above, no need to call get_latest_trajectory_folder again
        if trajectory_dir:
            # Extract test result for processing
            from reportportal_handler import extract_test_result_from_trajectory
            if force_stopped_due_to_turns:
                final_status = "FAILED"
                status_message = "exceeded maximum turn limit ({} turns)".format(max_turns)
                test_result_data.update({
                    "success": False,
                    "status": final_status,
                    "message": status_message,
                    "trajectory_dir": trajectory_dir
                })
            else:
                test_result = extract_test_result_from_trajectory(trajectory_dir)
                if test_result is True:
                    final_status = "PASSED" 
                    status_message = "completed successfully with positive result"
                    test_result_data.update({
                        "success": True,
                        "status": final_status,
                        "message": status_message,
                        "trajectory_dir": trajectory_dir
                    })
                else:
                    final_status = "FAILED"
                    status_message = "no valid success result found"
                    test_result_data.update({
                        "success": False,
                        "status": final_status,
                        "message": status_message,
                        "trajectory_dir": trajectory_dir
                    })
            if not enable_reportportal:
                # Local development mode - log results
                logger.info(f"🏠 LOCAL RESULT: {path} - {final_status} ({status_message})")
                logger.info(f"📹 Video saved: {video_path}")
                logger.info(f"📁 Trajectory: {trajectory_dir}")
        else:
            final_status = "FAILED"
            status_message = "no trajectory found"
            test_result_data.update({
                "success": False,
                "status": final_status,
                "message": status_message,
                "trajectory_dir": None
            })
            if not enable_reportportal:
                logger.warning(f"🏠 LOCAL RESULT: {path} - {final_status} ({status_message})")
        # Step 9: Always force close Jan app after test completion
        logger.info(f"Cleaning up after test: {path}")
        force_close_jan(jan_process_name)
        # Return test result
        return test_result_data
--- a/autoqa/tests/new-user/1-user-start-chatting.txt
+++ b/autoqa/tests/new-user/1-user-start-chatting.txt
@ -0,0 +1,15 @@
 prompt = """
 You are going to test the Jan application by downloading and chatting with a model (qwen2.5).
 Step-by-step instructions:
 1. Given the Jan application is already opened.
 2. In the **bottom-left corner**, click the **“Hub”** menu item.
 3. Scroll through the model list or use the search bar to find **qwen2.5**.
 4. Click **“Use”** on the qwen2.5 model.
 5. Wait for the model to finish downloading and become ready.
 6. Once redirected to the chat screen, type any message into the input box (e.g. `Hello qwen2.5`).
 7. Press **Enter** to send the message.
 8. Wait for the model’s response.
 If the model responds correctly, return: {"result": True}, otherwise return: {"result": False}.
 """
--- a/autoqa/utils.py
+++ b/autoqa/utils.py
@ -0,0 +1,343 @@
 import os
 import logging
 import subprocess
 import psutil
 import time
 import pyautogui
 import platform
 from pathlib import Path
 logger = logging.getLogger(__name__)
 # Cross-platform window management
 IS_LINUX = platform.system() == "Linux"
 IS_WINDOWS = platform.system() == "Windows"
 IS_MACOS = platform.system() == "Darwin"
 if IS_WINDOWS:
    try:
        import pygetwindow as gw
    except ImportError:
        gw = None
        logger.warning("pygetwindow not available on this system")
 def is_jan_running(jan_process_name="Jan.exe"):
    """
    Check if Jan application is currently running
    """
    for proc in psutil.process_iter(['pid', 'name']):
        try:
            if proc.info['name'] and jan_process_name.lower() in proc.info['name'].lower():
                return True
        except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
            pass
    return False
 def force_close_jan(jan_process_name="Jan.exe"):
    """
    Force close Jan application if it's running
    """
    logger.info("Checking for running Jan processes...")
    closed_any = False
    for proc in psutil.process_iter(['pid', 'name']):
        try:
            if proc.info['name'] and jan_process_name.lower() in proc.info['name'].lower():
                logger.info(f"Force closing Jan process (PID: {proc.info['pid']})")
                proc.kill()
                closed_any = True
        except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
            pass
    if closed_any:
        logger.info("Waiting for Jan processes to terminate...")
        time.sleep(3)  # Wait for processes to fully terminate
    else:
        logger.info("No Jan processes found running")
 def find_jan_window_linux():
    """
    Find Jan window on Linux using wmctrl
    """
    try:
        result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True, timeout=10)
        if result.returncode == 0:
            for line in result.stdout.split('\n'):
                if 'jan' in line.lower() or 'Jan' in line:
                    # Extract window ID (first column)
                    window_id = line.split()[0]
                    logger.info(f"Found Jan window with ID: {window_id}")
                    return window_id
    except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError) as e:
        logger.warning(f"wmctrl command failed: {e}")
    return None
 def maximize_jan_window_linux():
    """
    Maximize Jan window on Linux using wmctrl
    """
    window_id = find_jan_window_linux()
    if window_id:
        try:
            # Maximize window using wmctrl
            subprocess.run(['wmctrl', '-i', '-r', window_id, '-b', 'add,maximized_vert,maximized_horz'], 
                         timeout=5)
            logger.info("Jan window maximized using wmctrl")
            return True
        except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:
            logger.warning(f"Failed to maximize with wmctrl: {e}")
    # Fallback: Try xdotool
    try:
        result = subprocess.run(['xdotool', 'search', '--name', 'Jan'], 
                              capture_output=True, text=True, timeout=5)
        if result.returncode == 0 and result.stdout.strip():
            window_id = result.stdout.strip().split('\n')[0]
            subprocess.run(['xdotool', 'windowactivate', window_id], timeout=5)
            subprocess.run(['xdotool', 'key', 'alt+F10'], timeout=5)  # Maximize shortcut
            logger.info("Jan window maximized using xdotool")
            return True
    except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError) as e:
        logger.warning(f"xdotool command failed: {e}")
    return False
 def find_jan_window_macos():
    """
    Find Jan window on macOS using AppleScript
    """
    try:
        # AppleScript to find Jan window
        script = '''
        tell application "System Events"
            set janApps to (every process whose name contains "Jan")
            if length of janApps > 0 then
                return name of first item of janApps
            else
                return ""
            end if
        end tell
        '''
        result = subprocess.run(['osascript', '-e', script], 
                              capture_output=True, text=True, timeout=10)
        if result.returncode == 0 and result.stdout.strip():
            app_name = result.stdout.strip()
            logger.info(f"Found Jan app: {app_name}")
            return app_name
    except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError) as e:
        logger.warning(f"AppleScript command failed: {e}")
    return None
 def maximize_jan_window_macos():
    """
    Maximize Jan window on macOS using AppleScript
    """
    app_name = find_jan_window_macos()
    if app_name:
        try:
            # AppleScript to maximize window
            script = f'''
            tell application "System Events"
                tell process "{app_name}"
                    set frontmost to true
                    tell window 1
                        set value of attribute "AXFullScreen" to true
                    end tell
                end tell
            end tell
            '''
            result = subprocess.run(['osascript', '-e', script], timeout=10)
            if result.returncode == 0:
                logger.info("Jan window maximized using AppleScript")
                return True
        except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:
            logger.warning(f"Failed to maximize with AppleScript: {e}")
    # Fallback: Try Command+M (fullscreen hotkey on macOS)
    try:
        logger.info("Trying Cmd+Ctrl+F hotkey to maximize")
        pyautogui.hotkey('cmd', 'ctrl', 'f')
        time.sleep(1)
        logger.info("Attempted to maximize using Cmd+Ctrl+F")
        return True
    except Exception as e:
        logger.warning(f"Hotkey maximize failed: {e}")
    return False
 def maximize_jan_window():
    """
    Find and maximize Jan window (cross-platform)
    """
    try:
        # Wait a bit for window to appear
        time.sleep(2)
        if IS_LINUX:
            return maximize_jan_window_linux()
        elif IS_MACOS:
            return maximize_jan_window_macos()
        elif IS_WINDOWS and gw:
            # Method 1: Try to find window by title containing "Jan"
            windows = gw.getWindowsWithTitle("Jan")
            if windows:
                jan_window = windows[0]
                logger.info(f"Found Jan window: {jan_window.title}")
                jan_window.maximize()
                logger.info("Jan window maximized using pygetwindow")
                return True
        # Fallback methods for both platforms
        # Method 2: Try Alt+Space then X (maximize hotkey) - works on both platforms
        logger.info("Trying Alt+Space+X hotkey to maximize")
        pyautogui.hotkey('alt', 'space')
        time.sleep(0.5)
        pyautogui.press('x')
        logger.info("Attempted to maximize using Alt+Space+X")
        return True
    except Exception as e:
        logger.warning(f"Could not maximize Jan window: {e}")
        # Method 3: Platform-specific fallback
        try:
            if IS_WINDOWS:
                logger.info("Trying Windows+Up arrow to maximize")
                pyautogui.hotkey('win', 'up')
            elif IS_LINUX:
                logger.info("Trying Alt+F10 to maximize")
                pyautogui.hotkey('alt', 'F10')
            elif IS_MACOS:
                logger.info("Trying macOS specific maximize")
                pyautogui.hotkey('cmd', 'tab')  # Switch to Jan if it's running
                time.sleep(0.5)
            return True
        except Exception as e2:
            logger.warning(f"All maximize methods failed: {e2}")
            return False
 def start_jan_app(jan_app_path=None):
    """
    Start Jan application in maximized window (cross-platform)
    """
    # Set default path based on platform
    if jan_app_path is None:
        if IS_WINDOWS:
            jan_app_path = os.path.expanduser(r"~\AppData\Local\Programs\jan\Jan.exe")
        elif IS_LINUX:
            jan_app_path = "/usr/bin/Jan"  # or "/usr/bin/Jan" for regular
        elif IS_MACOS:
            jan_app_path = "/Applications/Jan.app/Contents/MacOS/Jan"  # Default macOS path
        else:
            raise NotImplementedError(f"Platform {platform.system()} not supported")
    logger.info(f"Starting Jan application from: {jan_app_path}")
    if not os.path.exists(jan_app_path):
        logger.error(f"Jan executable not found at: {jan_app_path}")
        raise FileNotFoundError(f"Jan app not found at {jan_app_path}")
    try:
        # Start the Jan application
        if IS_WINDOWS:
            subprocess.Popen([jan_app_path], shell=True)
        elif IS_LINUX:
            # On Linux, start with DISPLAY environment variable
            env = os.environ.copy()
            subprocess.Popen([jan_app_path], env=env)
        elif IS_MACOS:
            # On macOS, use 'open' command to launch .app bundle properly
            if jan_app_path.endswith('.app/Contents/MacOS/Jan'):
                # Use the .app bundle path instead
                app_bundle = jan_app_path.replace('/Contents/MacOS/Jan', '')
                subprocess.Popen(['open', app_bundle])
            elif jan_app_path.endswith('.app'):
                # Direct .app bundle
                subprocess.Popen(['open', jan_app_path])
            elif '/Contents/MacOS/' in jan_app_path:
                # Extract app bundle from full executable path
                app_bundle = jan_app_path.split('/Contents/MacOS/')[0]
                subprocess.Popen(['open', app_bundle])
            else:
                # Fallback: try to execute directly
                subprocess.Popen([jan_app_path])
        else:
            raise NotImplementedError(f"Platform {platform.system()} not supported")
        logger.info("Jan application started")
        # Wait for app to fully load
        logger.info("Waiting for Jan application to initialize...")
        time.sleep(5)
        # Try to maximize the window
        if maximize_jan_window():
            logger.info("Jan application maximized successfully")
        else:
            logger.warning("Could not maximize Jan application window")
        # Wait a bit more after maximizing
        time.sleep(10)
        logger.info("Jan application should be ready")
        time.sleep(10)  # Additional wait to ensure everything is ready
    except Exception as e:
        logger.error(f"Error starting Jan application: {e}")
        raise
 def scan_test_files(tests_dir="tests"):
    """
    Scan tests folder and find all .txt files
    Returns list with format [{'path': 'relative_path', 'prompt': 'file_content'}]
    """
    test_files = []
    tests_path = Path(tests_dir)
    if not tests_path.exists():
        logger.error(f"Tests directory {tests_dir} does not exist!")
        return test_files
    # Scan all .txt files in folder and subfolders
    for txt_file in tests_path.rglob("*.txt"):
        try:
            # Read file content
            with open(txt_file, 'r', encoding='utf-8') as f:
                content = f.read().strip()
            # Get relative path
            relative_path = txt_file.relative_to(tests_path)
            test_files.append({
                'path': str(relative_path),
                'prompt': content
            })
            logger.info(f"Found test file: {relative_path}")
        except Exception as e:
            logger.error(f"Error reading file {txt_file}: {e}")
    return test_files
 def get_latest_trajectory_folder(trajectory_base_path):
    """
    Get the latest created folder in trajectory base path
    """
    if not os.path.exists(trajectory_base_path):
        logger.warning(f"Trajectory base path not found: {trajectory_base_path}")
        return None
    # Get all folders and sort by creation time (latest first)
    folders = [f for f in os.listdir(trajectory_base_path) 
               if os.path.isdir(os.path.join(trajectory_base_path, f))]
    if not folders:
        logger.warning(f"No trajectory folders found in: {trajectory_base_path}")
        return None
    # Sort by folder name (assuming timestamp format like 20250715_100443)
    folders.sort(reverse=True)
    latest_folder = folders[0]
    full_path = os.path.join(trajectory_base_path, latest_folder)
    logger.info(f"Found latest trajectory folder: {full_path}")
    return full_path