tests: add apperance font size test

chore: update qa checklist
test: add testcases for light and dark theme on apperance
2025-09-04 10:18:43 +07:00 · 2025-09-04 09:08:31 +07:00 · 2025-09-04 09:06:15 +07:00 · 2025-09-03 17:33:53 +07:00 · 2025-09-03 15:58:47 +07:00 · 2025-09-03 15:08:20 +07:00
53 changed files with 6589 additions and 110 deletions
--- a/.github/workflows/autoqa-migration.yml
+++ b/.github/workflows/autoqa-migration.yml
@ -0,0 +1,368 @@
+name: AutoQA Migration (Manual)
+
+on:
+  workflow_dispatch:
+    inputs:
+      is_nightly:
+        description: 'Is the app a nightly build?'
+        required: true
+        type: boolean
+        default: false
+      old_windows_installer:
+        description: 'Windows OLD installer URL or path (.exe)'
+        required: true
+        type: string
+        default: 'https://catalog.jan.ai/windows/Jan_0.6.7_x64-setup.exe'
+      new_windows_installer:
+        description: 'Windows NEW installer URL or path (.exe)'
+        required: true
+        type: string
+        default: 'https://catalog.jan.ai/windows/Jan_0.6.8_x64-setup.exe'
+      old_ubuntu_installer:
+        description: 'Ubuntu OLD installer URL or path (.deb)'
+        required: false
+        type: string
+        default: 'https://catalog.jan.ai/linux/Jan_0.6.7_amd64.deb'
+      new_ubuntu_installer:
+        description: 'Ubuntu NEW installer URL or path (.deb)'
+        required: false
+        type: string
+        default: 'https://catalog.jan.ai/linux/Jan_0.6.8_amd64.deb'
+      old_macos_installer:
+        description: 'macOS OLD installer URL or path (.dmg)'
+        required: false
+        type: string
+        default: 'https://catalog.jan.ai/macos/Jan_0.6.7_universal.dmg'
+      new_macos_installer:
+        description: 'macOS NEW installer URL or path (.dmg)'
+        required: false
+        type: string
+        default: 'https://catalog.jan.ai/macos/Jan_0.6.8_universal.dmg'
+      migration_test_case:
+        description: 'Specific migration test case key (leave empty to run all)'
+        required: false
+        type: string
+        default: ''
+      max_turns:
+        description: 'Maximum turns per test phase'
+        required: false
+        type: number
+        default: 65
+
+jobs:
+  migration-windows:
+    runs-on: windows-11-nvidia-gpu
+    timeout-minutes: 60
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Python 3.13
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.13'
+
+      - name: Clean existing Jan installations
+        shell: powershell
+        run: |
+          .\autoqa\scripts\windows_cleanup.ps1 -IsNightly "${{ inputs.is_nightly }}"
+
+      - name: Download OLD and NEW installers
+        shell: powershell
+        run: |
+          # Download OLD installer using existing script
+          .\autoqa\scripts\windows_download.ps1 `
+            -WorkflowInputUrl "${{ inputs.old_windows_installer }}" `
+            -WorkflowInputIsNightly "${{ inputs.is_nightly }}" `
+            -RepoVariableUrl "" `
+            -RepoVariableIsNightly "" `
+            -DefaultUrl "" `
+            -DefaultIsNightly ""
+
+          $oldSrc = Join-Path $env:TEMP 'jan-installer.exe'
+          $oldOut = Join-Path $env:TEMP 'jan-old.exe'
+          Copy-Item -Path $oldSrc -Destination $oldOut -Force
+
+          # Download NEW installer using existing script
+          .\autoqa\scripts\windows_download.ps1 `
+            -WorkflowInputUrl "${{ inputs.new_windows_installer }}" `
+            -WorkflowInputIsNightly "${{ inputs.is_nightly }}" `
+            -RepoVariableUrl "" `
+            -RepoVariableIsNightly "" `
+            -DefaultUrl "" `
+            -DefaultIsNightly ""
+
+          $newSrc = Join-Path $env:TEMP 'jan-installer.exe'
+          $newOut = Join-Path $env:TEMP 'jan-new.exe'
+          Copy-Item -Path $newSrc -Destination $newOut -Force
+
+          Write-Host "OLD installer: $oldOut"
+          Write-Host "NEW installer: $newOut"
+          echo "OLD_VERSION=$oldOut" | Out-File -FilePath $env:GITHUB_ENV -Append
+          echo "NEW_VERSION=$newOut" | Out-File -FilePath $env:GITHUB_ENV -Append
+
+      - name: Install Python dependencies
+        working-directory: autoqa
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run migration tests (Windows)
+        working-directory: autoqa
+        shell: powershell
+        env:
+          RP_TOKEN: ${{ secrets.RP_TOKEN }}
+          ENABLE_REPORTPORTAL: 'true'
+          RP_ENDPOINT: 'https://reportportal.menlo.ai'
+          RP_PROJECT: 'default_personal'
+        run: |
+          $case = "${{ inputs.migration_test_case }}"
+          $procName = if ("${{ inputs.is_nightly }}" -eq "true") { "Jan-nightly.exe" } else { "Jan.exe" }
+          if ($case -and $case.Trim() -ne "") {
+            python main.py --enable-migration-test --old-version "$env:OLD_VERSION" --new-version "$env:NEW_VERSION" --max-turns ${{ inputs.max_turns }} --jan-process-name "$procName" --migration-test-case $case
+          } else {
+            python main.py --enable-migration-test --old-version "$env:OLD_VERSION" --new-version "$env:NEW_VERSION" --max-turns ${{ inputs.max_turns }} --jan-process-name "$procName"
+          }
+
+      - name: Upload screen recordings
+        if: always()
+        uses: actions/upload-artifact@v4
+        continue-on-error: true
+        with:
+          name: migration-recordings-${{ github.run_number }}-windows
+          path: autoqa/recordings/
+
+      - name: Upload trajectories
+        if: always()
+        uses: actions/upload-artifact@v4
+        continue-on-error: true
+        with:
+          name: migration-trajectories-${{ github.run_number }}-windows
+          path: autoqa/trajectories/
+
+      - name: Cleanup after tests
+        if: always()
+        shell: powershell
+        run: |
+          .\autoqa\scripts\windows_post_cleanup.ps1 -IsNightly "${{ inputs.is_nightly }}"
+
+  migration-ubuntu:
+    if: inputs.old_ubuntu_installer != '' && inputs.new_ubuntu_installer != ''
+    runs-on: ubuntu-22-04-nvidia-gpu
+    timeout-minutes: 60
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Python 3.13
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.13'
+
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y \
+            x11-utils \
+            python3-tk \
+            python3-dev \
+            wmctrl \
+            xdotool \
+            libnss3-dev \
+            libgconf-2-4 \
+            libxss1 \
+            libasound2 \
+            libxtst6 \
+            libgtk-3-0 \
+            libgbm-dev \
+            libxshmfence1 \
+            libxrandr2 \
+            libpangocairo-1.0-0 \
+            libatk1.0-0 \
+            libcairo-gobject2 \
+            libgdk-pixbuf2.0-0 \
+            gnome-screenshot \
+            libwebkit2gtk-4.1-0 \
+            xvfb
+
+      - name: Setup script permissions
+        run: |
+          chmod +x autoqa/scripts/setup_permissions.sh || true
+          ./autoqa/scripts/setup_permissions.sh || true
+
+      - name: Clean existing Jan installations
+        run: |
+          ./autoqa/scripts/ubuntu_cleanup.sh
+
+      - name: Download OLD and NEW installers
+        run: |
+          set -e
+          # Download OLD installer using existing script
+          ./autoqa/scripts/ubuntu_download.sh \
+            "${{ inputs.old_ubuntu_installer }}" \
+            "${{ inputs.is_nightly }}" \
+            "" \
+            "" \
+            "" \
+            ""
+          cp /tmp/jan-installer.deb /tmp/jan-old.deb
+
+          # Download NEW installer using existing script
+          ./autoqa/scripts/ubuntu_download.sh \
+            "${{ inputs.new_ubuntu_installer }}" \
+            "${{ inputs.is_nightly }}" \
+            "" \
+            "" \
+            "" \
+            ""
+          cp /tmp/jan-installer.deb /tmp/jan-new.deb
+
+          echo "OLD_VERSION=/tmp/jan-old.deb" >> $GITHUB_ENV
+          echo "NEW_VERSION=/tmp/jan-new.deb" >> $GITHUB_ENV
+
+      - name: Install Python dependencies
+        working-directory: autoqa
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run migration tests (Ubuntu)
+        working-directory: autoqa
+        run: |
+          case="${{ inputs.migration_test_case }}"
+          procName=$([ "${{ inputs.is_nightly }}" = "true" ] && echo "Jan-nightly" || echo "Jan")
+          if [ -n "${case}" ]; then
+            xvfb-run -a python main.py --enable-migration-test --old-version "${OLD_VERSION}" --new-version "${NEW_VERSION}" --max-turns ${{ inputs.max_turns }} --jan-process-name "${procName}" --migration-test-case "${case}"
+          else
+            xvfb-run -a python main.py --enable-migration-test --old-version "${OLD_VERSION}" --new-version "${NEW_VERSION}" --max-turns ${{ inputs.max_turns }} --jan-process-name "${procName}"
+          fi
+
+      - name: Upload screen recordings
+        if: always()
+        uses: actions/upload-artifact@v4
+        continue-on-error: true
+        with:
+          name: migration-recordings-${{ github.run_number }}-ubuntu
+          path: autoqa/recordings/
+
+      - name: Upload trajectories
+        if: always()
+        uses: actions/upload-artifact@v4
+        continue-on-error: true
+        with:
+          name: migration-trajectories-${{ github.run_number }}-ubuntu
+          path: autoqa/trajectories/
+
+      - name: Cleanup after tests
+        if: always()
+        run: |
+          ./autoqa/scripts/ubuntu_post_cleanup.sh "${{ inputs.is_nightly }}"
+
+  migration-macos:
+    if: inputs.old_macos_installer != '' && inputs.new_macos_installer != ''
+    runs-on: macos-selfhosted-15-arm64-cua
+    timeout-minutes: 60
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Python 3.13
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.13'
+
+      - name: Setup script permissions
+        run: |
+          chmod +x autoqa/scripts/setup_permissions.sh || true
+          ./autoqa/scripts/setup_permissions.sh || true
+
+      - name: Clean existing Jan installations
+        run: |
+          ./autoqa/scripts/macos_cleanup.sh
+
+      - name: Download OLD and NEW installers
+        run: |
+          set -e
+          # Download OLD installer using existing script
+          ./autoqa/scripts/macos_download.sh \
+            "${{ inputs.old_macos_installer }}" \
+            "${{ inputs.is_nightly }}" \
+            "" \
+            "" \
+            "" \
+            ""
+          cp /tmp/jan-installer.dmg /tmp/jan-old.dmg
+
+          # Download NEW installer using existing script
+          ./autoqa/scripts/macos_download.sh \
+            "${{ inputs.new_macos_installer }}" \
+            "${{ inputs.is_nightly }}" \
+            "" \
+            "" \
+            "" \
+            ""
+          cp /tmp/jan-installer.dmg /tmp/jan-new.dmg
+
+          echo "OLD_VERSION=/tmp/jan-old.dmg" >> $GITHUB_ENV
+          echo "NEW_VERSION=/tmp/jan-new.dmg" >> $GITHUB_ENV
+      - name: Install system dependencies
+        run: |
+          echo "Installing system dependencies for macOS..."
+
+          # Check if Homebrew is available
+          if command -v brew >/dev/null 2>&1; then
+            echo "Homebrew is available"
+            
+            # Install python-tk if not available
+            python3 -c "import tkinter" 2>/dev/null || {
+              echo "Installing python-tk via Homebrew..."
+              brew install python-tk || true
+            }
+          else
+            echo "Homebrew not available, checking if tkinter works..."
+            python3 -c "import tkinter" || {
+              echo "[WARNING] tkinter not available and Homebrew not found"
+              echo "This may cause issues with mouse control"
+            }
+          fi
+
+          echo "System dependencies check completed"
+      - name: Install Python dependencies
+        working-directory: autoqa
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run migration tests (macOS)
+        working-directory: autoqa
+        run: |
+          case="${{ inputs.migration_test_case }}"
+          procName=$([ "${{ inputs.is_nightly }}" = "true" ] && echo "Jan-nightly" || echo "Jan")
+          if [ -n "${case}" ]; then
+            python main.py --enable-migration-test --old-version "${OLD_VERSION}" --new-version "${NEW_VERSION}" --max-turns ${{ inputs.max_turns }} --jan-process-name "${procName}" --migration-test-case "${case}"
+          else
+            python main.py --enable-migration-test --old-version "${OLD_VERSION}" --new-version "${NEW_VERSION}" --max-turns ${{ inputs.max_turns }} --jan-process-name "${procName}"
+          fi
+
+      - name: Upload screen recordings
+        if: always()
+        uses: actions/upload-artifact@v4
+        continue-on-error: true
+        with:
+          name: migration-recordings-${{ github.run_number }}-macos
+          path: autoqa/recordings/
+
+      - name: Upload trajectories
+        if: always()
+        uses: actions/upload-artifact@v4
+        continue-on-error: true
+        with:
+          name: migration-trajectories-${{ github.run_number }}-macos
+          path: autoqa/trajectories/
+
+      - name: Cleanup after tests
+        if: always()
+        run: |
+          ./autoqa/scripts/macos_post_cleanup.sh
+
+
--- a/.github/workflows/autoqa-reliability.yml
+++ b/.github/workflows/autoqa-reliability.yml
@ -0,0 +1,311 @@
+name: AutoQA Reliability (Manual)
+
+on:
+  workflow_dispatch:
+    inputs:
+      source_type:
+        description: 'App source type (url)'
+        required: true
+        type: choice
+        options: [url]
+        default: url
+      jan_app_windows_source:
+        description: 'Windows installer URL path (used when source_type=url or to select artifact)'
+        required: true
+        type: string
+        default: 'https://catalog.jan.ai/windows/Jan_0.6.8_x64-setup.exe'
+      jan_app_ubuntu_source:
+        description: 'Ubuntu .deb URL path'
+        required: true
+        type: string
+        default: 'https://catalog.jan.ai/linux/Jan_0.6.8_amd64.deb'
+      jan_app_macos_source:
+        description: 'macOS .dmg URL path'
+        required: true
+        type: string
+        default: 'https://catalog.jan.ai/macos/Jan_0.6.8_universal.dmg'
+      is_nightly:
+        description: 'Is the app a nightly build?'
+        required: true
+        type: boolean
+        default: false
+      reliability_phase:
+        description: 'Reliability phase'
+        required: true
+        type: choice
+        options: [development, deployment]
+        default: development
+      reliability_runs:
+        description: 'Custom runs (0 uses phase default)'
+        required: false
+        type: number
+        default: 0
+      reliability_test_path:
+        description: 'Test file path (relative to autoqa working directory)'
+        required: true
+        type: string
+        default: 'tests/base/settings/app-data.txt'
+
+jobs:
+  reliability-windows:
+    runs-on: windows-11-nvidia-gpu
+    timeout-minutes: 60
+    env:
+      DEFAULT_JAN_APP_URL: 'https://catalog.jan.ai/windows/Jan_0.6.8_x64-setup.exe'
+      DEFAULT_IS_NIGHTLY: 'false'
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Python 3.13
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.13'
+
+      - name: Clean existing Jan installations
+        shell: powershell
+        run: |
+          .\autoqa\scripts\windows_cleanup.ps1 -IsNightly "${{ inputs.is_nightly }}"
+
+      - name: Download/Prepare Jan app
+        shell: powershell
+        run: |
+          .\autoqa\scripts\windows_download.ps1 `
+            -WorkflowInputUrl "${{ inputs.jan_app_windows_source }}" `
+            -WorkflowInputIsNightly "${{ inputs.is_nightly }}" `
+            -RepoVariableUrl "${{ vars.JAN_APP_URL }}" `
+            -RepoVariableIsNightly "${{ vars.IS_NIGHTLY }}" `
+            -DefaultUrl "$env:DEFAULT_JAN_APP_URL" `
+            -DefaultIsNightly "$env:DEFAULT_IS_NIGHTLY"
+
+      - name: Install Jan app
+        shell: powershell
+        run: |
+          .\autoqa\scripts\windows_install.ps1 -IsNightly "$env:IS_NIGHTLY"
+
+      - name: Install Python dependencies
+        working-directory: autoqa
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run reliability tests
+        working-directory: autoqa
+        shell: powershell
+        run: |
+          $runs = "${{ inputs.reliability_runs }}"
+          $runsArg = ""
+          if ([int]$runs -gt 0) { $runsArg = "--reliability-runs=$runs" }
+          python main.py --enable-reliability-test --reliability-phase "${{ inputs.reliability_phase }}" --reliability-test-path "${{ inputs.reliability_test_path }}" $runsArg
+
+      - name: Upload screen recordings
+        if: always()
+        uses: actions/upload-artifact@v4
+        continue-on-error: true
+        with:
+          name: reliability-recordings-${{ github.run_number }}-${{ runner.os }}
+          path: autoqa/recordings/
+
+      - name: Upload trajectories
+        if: always()
+        uses: actions/upload-artifact@v4
+        continue-on-error: true
+        with:
+          name: reliability-trajectories-${{ github.run_number }}-${{ runner.os }}
+          path: autoqa/trajectories/
+
+      - name: Cleanup after tests
+        if: always()
+        shell: powershell
+        run: |
+          .\autoqa\scripts\windows_post_cleanup.ps1 -IsNightly "${{ inputs.is_nightly }}"
+
+  reliability-ubuntu:
+    runs-on: ubuntu-22-04-nvidia-gpu
+    timeout-minutes: 60
+    env:
+      DEFAULT_JAN_APP_URL: 'https://catalog.jan.ai/linux/Jan_0.6.8_amd64.deb'
+      DEFAULT_IS_NIGHTLY: 'false'
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Python 3.13
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.13'
+
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y \
+            x11-utils \
+            python3-tk \
+            python3-dev \
+            wmctrl \
+            xdotool \
+            libnss3-dev \
+            libgconf-2-4 \
+            libxss1 \
+            libasound2 \
+            libxtst6 \
+            libgtk-3-0 \
+            libgbm-dev \
+            libxshmfence1 \
+            libxrandr2 \
+            libpangocairo-1.0-0 \
+            libatk1.0-0 \
+            libcairo-gobject2 \
+            libgdk-pixbuf2.0-0 \
+            gnome-screenshot \
+            xvfb
+
+      - name: Setup script permissions
+        run: |
+          chmod +x autoqa/scripts/setup_permissions.sh
+          ./autoqa/scripts/setup_permissions.sh
+
+      - name: Clean existing Jan installations
+        run: |
+          ./autoqa/scripts/ubuntu_cleanup.sh
+
+      - name: Download/Prepare Jan app
+        run: |
+          ./autoqa/scripts/ubuntu_download.sh \
+            "${{ inputs.jan_app_ubuntu_source }}" \
+            "${{ inputs.is_nightly }}" \
+            "${{ vars.JAN_APP_URL_LINUX }}" \
+            "${{ vars.IS_NIGHTLY }}" \
+            "$DEFAULT_JAN_APP_URL" \
+            "$DEFAULT_IS_NIGHTLY"
+          # Set env for install and runtime
+          echo "JAN_APP_PATH=/tmp/jan-installer.deb" >> $GITHUB_ENV
+          if [ "${{ inputs.is_nightly }}" = "true" ]; then
+            echo "JAN_PROCESS_NAME=Jan-nightly" >> $GITHUB_ENV
+            echo "RUNTIME_JAN_BIN=/usr/bin/Jan-nightly" >> $GITHUB_ENV
+          else
+            echo "JAN_PROCESS_NAME=Jan" >> $GITHUB_ENV
+            echo "RUNTIME_JAN_BIN=/usr/bin/Jan" >> $GITHUB_ENV
+          fi
+
+      - name: Install Jan app
+        run: |
+          ./autoqa/scripts/ubuntu_install.sh "$IS_NIGHTLY"
+
+      - name: Install Python dependencies
+        working-directory: autoqa
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run reliability tests
+        working-directory: autoqa
+        run: |
+          runs="${{ inputs.reliability_runs }}"
+          runsArg=""
+          if [ "${runs}" -gt 0 ]; then runsArg="--reliability-runs=${runs}"; fi
+          processName="${JAN_PROCESS_NAME}"
+          janBin="${RUNTIME_JAN_BIN}"
+          xvfb-run -a python main.py --enable-reliability-test --reliability-phase "${{ inputs.reliability_phase }}" --reliability-test-path "${{ inputs.reliability_test_path }}" ${runsArg} --jan-process-name "${processName}" --jan-app-path "${janBin}"
+
+      - name: Upload screen recordings
+        if: always()
+        uses: actions/upload-artifact@v4
+        continue-on-error: true
+        with:
+          name: reliability-recordings-${{ github.run_number }}-ubuntu
+          path: autoqa/recordings/
+
+      - name: Upload trajectories
+        if: always()
+        uses: actions/upload-artifact@v4
+        continue-on-error: true
+        with:
+          name: reliability-trajectories-${{ github.run_number }}-ubuntu
+          path: autoqa/trajectories/
+  
+      - name: Cleanup after tests
+        if: always()
+        run: |
+          ./autoqa/scripts/ubuntu_post_cleanup.sh "$IS_NIGHTLY"
+
+  reliability-macos:
+    runs-on: macos-selfhosted-15-arm64-cua
+    timeout-minutes: 60
+    env:
+      DEFAULT_JAN_APP_URL: 'https://catalog.jan.ai/macos/Jan_0.6.8_universal.dmg'
+      DEFAULT_IS_NIGHTLY: 'false'
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Python 3.13
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.13'
+
+      - name: Setup script permissions
+        run: |
+          chmod +x autoqa/scripts/setup_permissions.sh
+          ./autoqa/scripts/setup_permissions.sh
+
+      - name: Clean existing Jan installations
+        run: |
+          ./autoqa/scripts/macos_cleanup.sh
+
+      - name: Download/Prepare Jan app
+        run: |
+          ./autoqa/scripts/macos_download.sh \
+            "${{ inputs.jan_app_macos_source }}" \
+            "${{ inputs.is_nightly }}" \
+            "${{ vars.JAN_APP_URL }}" \
+            "${{ vars.IS_NIGHTLY }}" \
+            "$DEFAULT_JAN_APP_URL" \
+            "$DEFAULT_IS_NIGHTLY"
+          echo "JAN_APP_PATH=/tmp/jan-installer.dmg" >> $GITHUB_ENV
+          if [ "${{ inputs.is_nightly }}" = "true" ]; then
+            echo "PROCESS_NAME=Jan-nightly" >> $GITHUB_ENV
+            echo "RUNTIME_JAN_BIN=/Applications/Jan-nightly.app/Contents/MacOS/Jan-nightly" >> $GITHUB_ENV
+          else
+            echo "PROCESS_NAME=Jan" >> $GITHUB_ENV
+            echo "RUNTIME_JAN_BIN=/Applications/Jan.app/Contents/MacOS/Jan" >> $GITHUB_ENV
+          fi
+
+      - name: Install Jan app
+        run: |
+          ./autoqa/scripts/macos_install.sh
+
+      - name: Install Python dependencies
+        working-directory: autoqa
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run reliability tests
+        working-directory: autoqa
+        run: |
+          runs="${{ inputs.reliability_runs }}"
+          runsArg=""
+          if [ "${runs}" -gt 0 ]; then runsArg="--reliability-runs ${runs}"; fi
+          python main.py --enable-reliability-test --reliability-phase "${{ inputs.reliability_phase }}" --reliability-test-path "${{ inputs.reliability_test_path }}" ${runsArg} --jan-process-name "${PROCESS_NAME}" --jan-app-path "${RUNTIME_JAN_BIN}"
+
+      - name: Upload screen recordings
+        if: always()
+        uses: actions/upload-artifact@v4
+        continue-on-error: true
+        with:
+          name: reliability-recordings-${{ github.run_number }}-macos
+          path: autoqa/recordings/
+
+      - name: Upload trajectories
+        if: always()
+        uses: actions/upload-artifact@v4
+        continue-on-error: true
+        with:
+          name: reliability-trajectories-${{ github.run_number }}-macos
+          path: autoqa/trajectories/
+
+      - name: Cleanup after tests
+        if: always()
+        run: |
+          ./autoqa/scripts/macos_post_cleanup.sh
--- a/autoqa/COMMAND_REFERENCE.md
+++ b/autoqa/COMMAND_REFERENCE.md
@ -0,0 +1,441 @@
+# AutoQA Command Reference
+
+📚 Complete reference for all AutoQA command line arguments and options.
+
+## Command Line Arguments
+
+### Basic Syntax
+
+```bash
+python main.py [OPTIONS]
+```
+
+### Argument Groups
+
+Arguments are organized into logical groups for easier understanding and usage.
+
+## Computer Server Configuration
+
+| Argument | Environment Variable | Default | Description |
+|----------|---------------------|---------|-------------|
+| `--skip-server-start` | `SKIP_SERVER_START` | `false` | Skip automatic computer server startup |
+
+**Examples:**
+```bash
+# Auto-start computer server (default)
+python main.py
+
+# Use external computer server
+python main.py --skip-server-start
+
+# Using environment variable
+SKIP_SERVER_START=true python main.py
+```
+
+## ReportPortal Configuration
+
+| Argument | Environment Variable | Default | Description |
+|----------|---------------------|---------|-------------|
+| `--enable-reportportal` | `ENABLE_REPORTPORTAL` | `false` | Enable ReportPortal integration |
+| `--rp-endpoint` | `RP_ENDPOINT` | `https://reportportal.menlo.ai` | ReportPortal endpoint URL |
+| `--rp-project` | `RP_PROJECT` | `default_personal` | ReportPortal project name |
+| `--rp-token` | `RP_TOKEN` | - | ReportPortal API token (required when RP enabled) |
+| `--launch-name` | `LAUNCH_NAME` | - | Custom launch name for ReportPortal |
+
+**Examples:**
+```bash
+# Basic ReportPortal integration
+python main.py --enable-reportportal --rp-token "YOUR_TOKEN"
+
+# Full ReportPortal configuration
+python main.py \
+  --enable-reportportal \
+  --rp-endpoint "https://reportportal.example.com" \
+  --rp-project "my_project" \
+  --rp-token "YOUR_TOKEN" \
+  --launch-name "Custom Test Run"
+
+# Using environment variables
+ENABLE_REPORTPORTAL=true RP_TOKEN=secret python main.py
+```
+
+## Jan Application Configuration
+
+| Argument | Environment Variable | Default | Description |
+|----------|---------------------|---------|-------------|
+| `--jan-app-path` | `JAN_APP_PATH` | auto-detected | Path to Jan application executable |
+| `--jan-process-name` | `JAN_PROCESS_NAME` | platform-specific | Jan process name for monitoring |
+
+**Platform-specific defaults:**
+- **Windows**: `Jan.exe`
+- **macOS**: `Jan`
+- **Linux**: `Jan-nightly`
+
+**Examples:**
+```bash
+# Custom Jan app path
+python main.py --jan-app-path "C:/Custom/Path/Jan.exe"
+
+# Custom process name
+python main.py --jan-process-name "Jan-nightly.exe"
+
+# Using environment variable
+JAN_APP_PATH="D:/Apps/Jan/Jan.exe" python main.py
+```
+
+## Model Configuration
+
+| Argument | Environment Variable | Default | Description |
+|----------|---------------------|---------|-------------|
+| `--model-loop` | `MODEL_LOOP` | `uitars` | Agent loop type |
+| `--model-provider` | `MODEL_PROVIDER` | `oaicompat` | Model provider |
+| `--model-name` | `MODEL_NAME` | `ByteDance-Seed/UI-TARS-1.5-7B` | AI model name |
+| `--model-base-url` | `MODEL_BASE_URL` | `http://10.200.108.58:1234/v1` | Model API endpoint |
+
+**Examples:**
+```bash
+# OpenAI GPT-4
+python main.py \
+  --model-provider "openai" \
+  --model-name "gpt-4" \
+  --model-base-url "https://api.openai.com/v1"
+
+# Anthropic Claude
+python main.py \
+  --model-provider "anthropic" \
+  --model-name "claude-3-sonnet-20240229" \
+  --model-base-url "https://api.anthropic.com"
+
+# Custom local model
+python main.py \
+  --model-name "my-custom-model" \
+  --model-base-url "http://localhost:8000/v1"
+
+# Using environment variables
+MODEL_NAME=gpt-4 MODEL_BASE_URL=https://api.openai.com/v1 python main.py
+```
+
+## Test Execution Configuration
+
+| Argument | Environment Variable | Default | Description |
+|----------|---------------------|---------|-------------|
+| `--max-turns` | `MAX_TURNS` | `30` | Maximum number of turns per test |
+| `--tests-dir` | `TESTS_DIR` | `tests` | Directory containing test files |
+| `--delay-between-tests` | `DELAY_BETWEEN_TESTS` | `3` | Delay between tests (seconds) |
+
+**Examples:**
+```bash
+# Increase turn limit
+python main.py --max-turns 50
+
+# Custom test directory
+python main.py --tests-dir "my_tests"
+
+# Longer delay between tests
+python main.py --delay-between-tests 10
+
+# Using environment variables
+MAX_TURNS=50 DELAY_BETWEEN_TESTS=5 python main.py
+```
+
+## Migration Testing Arguments
+
+| Argument | Environment Variable | Default | Description |
+|----------|---------------------|---------|-------------|
+| `--enable-migration-test` | `ENABLE_MIGRATION_TEST` | `false` | Enable migration testing mode |
+| `--migration-test-case` | `MIGRATION_TEST_CASE` | - | Specific migration test case to run |
+| `--migration-batch-mode` | `MIGRATION_BATCH_MODE` | `false` | Use batch mode for migration tests |
+| `--old-version` | `OLD_VERSION` | - | Path to old version installer |
+| `--new-version` | `NEW_VERSION` | - | Path to new version installer |
+
+## Reliability Testing Arguments
+
+| Argument | Environment Variable | Default | Description |
+|----------|---------------------|---------|-------------|
+| `--enable-reliability-test` | `ENABLE_RELIABILITY_TEST` | `false` | Enable reliability testing mode |
+| `--reliability-phase` | `RELIABILITY_PHASE` | `development` | Testing phase: development (5 runs) or deployment (20 runs) |
+| `--reliability-runs` | `RELIABILITY_RUNS` | `0` | Custom number of runs (overrides phase setting) |
+| `--reliability-test-path` | `RELIABILITY_TEST_PATH` | - | Specific test file path for reliability testing |
+
+**Examples:**
+```bash
+# Basic migration test
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants" \
+  --old-version "C:\path\to\old\installer.exe" \
+  --new-version "C:\path\to\new\installer.exe"
+
+# Batch mode migration test
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants-complete" \
+  --migration-batch-mode \
+  --old-version "C:\path\to\old\installer.exe" \
+  --new-version "C:\path\to\new\installer.exe"
+
+# Using environment variables
+ENABLE_MIGRATION_TEST=true \
+MIGRATION_TEST_CASE=assistants \
+OLD_VERSION="C:\path\to\old.exe" \
+NEW_VERSION="C:\path\to\new.exe" \
+python main.py
+```
+
+## Complete Command Examples
+
+### Basic Testing
+
+```bash
+# Run all tests with defaults
+python main.py
+
+# Run specific test category
+python main.py --tests-dir "tests/base"
+
+# Custom configuration
+python main.py \
+  --max-turns 50 \
+  --model-name "gpt-4" \
+  --model-base-url "https://api.openai.com/v1" \
+  --tests-dir "tests/base"
+```
+
+### Migration Testing
+
+```bash
+# Simple migration test
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants" \
+  --old-version "Jan_0.6.6.exe" \
+  --new-version "Jan_0.6.7.exe" \
+  --max-turns 65
+
+# Complete migration test with ReportPortal
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants-complete" \
+  --migration-batch-mode \
+  --old-version "Jan_0.6.6.exe" \
+  --new-version "Jan_0.6.7.exe" \
+  --max-turns 75 \
+  --enable-reportportal \
+  --rp-token "YOUR_TOKEN" \
+  --rp-project "jan_migration_tests"
+
+# Reliability testing - deployment phase with ReportPortal
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase deployment \
+  --reliability-test-path "tests/base/default-jan-assistant.txt" \
+  --max-turns 50 \
+  --enable-reportportal \
+  --rp-token "YOUR_TOKEN" \
+  --rp-project "jan_reliability_tests"
+
+### Reliability Testing
+
+```bash
+# Development phase reliability test (5 runs)
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase development \
+  --max-turns 40
+
+# Deployment phase reliability test (20 runs)
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase deployment \
+  --max-turns 40
+
+# Custom number of runs
+python main.py \
+  --enable-reliability-test \
+  --reliability-runs 10 \
+  --max-turns 40
+
+# Test specific file with reliability testing
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase development \
+  --reliability-test-path "tests/base/default-jan-assistant.txt" \
+  --max-turns 40
+
+# Reliability testing with ReportPortal
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase deployment \
+  --enable-reportportal \
+  --rp-token "YOUR_TOKEN" \
+  --max-turns 40
+```
+
+### Advanced Configuration
+
+```bash
+# Full custom configuration
+python main.py \
+  --skip-server-start \
+  --enable-reportportal \
+  --rp-endpoint "https://custom.rp.com" \
+  --rp-project "jan_tests" \
+  --rp-token "YOUR_TOKEN" \
+  --jan-app-path "C:/Custom/Jan/Jan.exe" \
+  --jan-process-name "Jan-custom.exe" \
+  --model-provider "openai" \
+  --model-name "gpt-4-turbo" \
+  --model-base-url "https://api.openai.com/v1" \
+  --max-turns 100 \
+  --tests-dir "custom_tests" \
+  --delay-between-tests 5
+```
+
+## Environment Variables Summary
+
+### Computer Server
+- `SKIP_SERVER_START`: Skip auto computer server startup
+
+### ReportPortal
+- `ENABLE_REPORTPORTAL`: Enable ReportPortal integration
+- `RP_ENDPOINT`: ReportPortal endpoint URL
+- `RP_PROJECT`: ReportPortal project name
+- `RP_TOKEN`: ReportPortal API token
+- `LAUNCH_NAME`: Custom launch name
+
+### Jan Application
+- `JAN_APP_PATH`: Path to Jan executable
+- `JAN_PROCESS_NAME`: Jan process name
+
+### Model Configuration
+- `MODEL_LOOP`: Agent loop type
+- `MODEL_PROVIDER`: Model provider
+- `MODEL_NAME`: AI model name
+- `MODEL_BASE_URL`: Model API endpoint
+
+### Test Execution
+- `MAX_TURNS`: Maximum turns per test
+- `TESTS_DIR`: Test files directory
+- `DELAY_BETWEEN_TESTS`: Delay between tests
+
+### Migration Testing
+- `ENABLE_MIGRATION_TEST`: Enable migration mode
+- `MIGRATION_TEST_CASE`: Migration test case
+- `MIGRATION_BATCH_MODE`: Use batch mode
+- `OLD_VERSION`: Old installer path
+- `NEW_VERSION`: New installer path
+
+### Reliability Testing
+- `ENABLE_RELIABILITY_TEST`: Enable reliability testing mode
+- `RELIABILITY_PHASE`: Testing phase (development/deployment)
+- `RELIABILITY_RUNS`: Custom number of runs
+- `RELIABILITY_TEST_PATH`: Specific test file path
+
+## Help and Information
+
+### Get Help
+```bash
+# Show all available options
+python main.py --help
+
+# Show help for specific section
+python main.py --help | grep -A 10 "Migration"
+```
+
+### Version Information
+```bash
+# Check Python version
+python --version
+
+# Check AutoQA installation
+python -c "import autoqa; print(autoqa.__version__)"
+```
+
+### Debug Information
+```bash
+# Enable debug logging
+export LOG_LEVEL=DEBUG
+export PYTHONPATH=.
+
+# Run with verbose output
+python main.py --enable-migration-test ...
+```
+
+## Best Practices
+
+### 1. Use Environment Variables
+```bash
+# Set common configuration
+export MAX_TURNS=65
+export MODEL_NAME="gpt-4"
+export JAN_APP_PATH="C:\path\to\Jan.exe"
+
+# Use in commands
+python main.py --max-turns "$MAX_TURNS"
+```
+
+### 2. Combine Arguments Logically
+```bash
+# Group related arguments
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants" \
+  --old-version "old.exe" \
+  --new-version "new.exe" \
+  --max-turns 65 \
+  --enable-reportportal \
+  --rp-token "token"
+```
+
+### 3. Use Absolute Paths
+```bash
+# Windows
+--old-version "C:\Users\username\Downloads\Jan_0.6.6.exe"
+
+# Linux/macOS
+--old-version "/home/user/downloads/Jan_0.6.6.deb"
+```
+
+### 4. Test Incrementally
+```bash
+# Start simple
+python main.py
+
+# Add migration
+python main.py --enable-migration-test ...
+
+# Add ReportPortal
+python main.py --enable-migration-test ... --enable-reportportal ...
+```
+
+## Troubleshooting Commands
+
+### Check Dependencies
+```bash
+# Verify Python packages
+pip list | grep -E "(autoqa|computer|agent)"
+
+# Check imports
+python -c "import computer, agent, autoqa; print('All imports successful')"
+```
+
+### Check Configuration
+```bash
+# Validate arguments
+python main.py --help
+
+# Test specific configuration
+python main.py --jan-app-path "nonexistent" 2>&1 | grep "not found"
+```
+
+### Debug Mode
+```bash
+# Enable debug logging
+export LOG_LEVEL=DEBUG
+export PYTHONPATH=.
+
+# Run with debug output
+python main.py --enable-migration-test ...
+```
+
+For more detailed information, see [MIGRATION_TESTING.md](MIGRATION_TESTING.md), [QUICK_START.md](QUICK_START.md), and [README.md](README.md).
--- a/autoqa/MIGRATION_TESTING.md
+++ b/autoqa/MIGRATION_TESTING.md
@ -0,0 +1,370 @@
+# AutoQA Migration Testing Guide
+
+🚀 Comprehensive guide for running migration tests with AutoQA to verify data persistence across Jan application upgrades.
+
+## Table of Contents
+
+1. [Overview](#overview)
+2. [Prerequisites](#prerequisites)
+3. [Basic Workflow (Base Test Cases)](#basic-workflow-base-test-cases)
+4. [Migration Testing](#migration-testing)
+5. [Migration Test Cases](#migration-test-cases)
+6. [Running Migration Tests](#running-migration-tests)
+7. [Advanced Configuration](#advanced-configuration)
+8. [Troubleshooting](#troubleshooting)
+9. [Examples](#examples)
+
+## Overview
+
+AutoQA provides comprehensive testing capabilities for the Jan application, including:
+
+- **Base Test Cases**: Standard functionality testing (assistants, models, extensions, etc.)
+- **Migration Testing**: Verify data persistence and functionality across application upgrades
+- **Batch Mode**: Run multiple test phases efficiently
+- **Screen Recording**: Capture test execution for debugging
+- **ReportPortal Integration**: Upload test results and artifacts
+
+## Prerequisites
+
+Before running migration tests, ensure you have:
+
+1. **Python Environment**: Python 3.8+ with required packages
+2. **Jan Installers**: Both old and new version installers
+3. **Test Environment**: Clean system or virtual machine
+4. **Dependencies**: All AutoQA requirements installed
+
+```bash
+# Install dependencies
+pip install -r requirements.txt
+
+```
+
+## Basic Workflow (Base Test Cases)
+
+### Running Standard Tests
+
+Base test cases verify core Jan functionality without version upgrades:
+
+```bash
+# Run all base tests
+python main.py
+
+# Run specific test directory
+python main.py --tests-dir "tests/base"
+
+# Run with custom configuration
+python main.py \
+  --max-turns 50 
+```
+
+### Available Base Test Cases
+
+| Test Case | File | Description |
+|-----------|------|-------------|
+| Default Assistant | `tests/base/default-jan-assistant.txt` | Verify Jan default assistant exists |
+| Extensions | `tests/base/extensions.txt` | Check available extensions |
+| Hardware Info | `tests/base/hardware-info.txt` | Verify hardware information display |
+| Model Providers | `tests/base/providers-available.txt` | Check model provider availability |
+| User Chat | `tests/base/user-start-chatting.txt` | Test basic chat functionality |
+| MCP Server | `tests/base/enable-mcp-server.txt` | Test experimental features |
+
+## Migration Testing
+
+Migration testing verifies that user data and configurations persist correctly when upgrading Jan from one version to another.
+
+### Migration Test Flow
+
+```
+1. Install OLD version → Run SETUP tests
+2. Install NEW version → Run VERIFICATION tests
+3. Compare results and verify persistence
+```
+
+### Migration Test Approaches
+
+#### Individual Mode
+- Runs one test case at a time
+- More granular debugging
+- Better for development and troubleshooting
+
+#### Batch Mode
+- Runs all setup tests first, then upgrades, then all verification tests
+- More realistic user experience
+- Faster execution for multiple test cases
+
+## Migration Test Cases
+
+### Available Migration Test Cases
+
+| Test Case Key | Name | Description | Setup Tests | Verification Tests |
+|---------------|------|-------------|-------------|-------------------|
+| `models` | Model Downloads Migration | Tests downloaded models persist after upgrade | `models/setup-download-models.txt` | `models/verify-model-persistence.txt` |
+| `assistants` | Custom Assistants Migration | Tests custom assistants persist after upgrade | `assistants/setup-create-assistants.txt` | `assistants/verify-create-assistant-persistence.txt` |
+| `assistants-complete` | Complete Assistants Migration | Tests both creation and chat functionality | Multiple setup tests | Multiple verification tests |
+
+### Test Case Details
+
+#### Models Migration Test
+- **Setup**: Downloads models, configures settings, tests functionality
+- **Verification**: Confirms models persist, settings maintained, functionality intact
+
+#### Assistants Migration Test
+- **Setup**: Creates custom assistants with specific configurations
+- **Verification**: Confirms assistants persist with correct metadata and settings
+
+#### Assistants Complete Migration Test
+- **Setup**: Creates assistants AND tests chat functionality
+- **Verification**: Confirms both creation and chat data persist correctly
+
+## Running Migration Tests
+
+### Basic Migration Test Command
+
+```bash
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants" \
+  --old-version "path/to/old/installer.exe" \
+  --new-version "path/to/new/installer.exe" \
+  --max-turns 65
+```
+
+### Batch Mode Migration Test
+
+```bash
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants-complete" \
+  --migration-batch-mode \
+  --old-version "path/to/old/installer.exe" \
+  --new-version "path/to/new/installer.exe" \
+  --max-turns 75
+```
+
+### Command Line Arguments
+
+| Argument | Description | Required | Example |
+|----------|-------------|----------|---------|
+| `--enable-migration-test` | Enable migration testing mode | Yes | `--enable-migration-test` |
+| `--migration-test-case` | Specific test case to run | Yes | `--migration-test-case "assistants"` |
+| `--migration-batch-mode` | Use batch mode for multiple tests | No | `--migration-batch-mode` |
+| `--old-version` | Path to old version installer | Yes | `--old-version "C:\path\to\old.exe"` |
+| `--new-version` | Path to new version installer | Yes | `--new-version "C:\path\to\new.exe"` |
+| `--max-turns` | Maximum turns per test phase | No | `--max-turns 75` |
+
+### Environment Variables
+
+You can also use environment variables for cleaner commands:
+
+```bash
+# Set environment variables
+export OLD_VERSION="C:\path\to\old\installer.exe"
+export NEW_VERSION="C:\path\to\new\installer.exe"
+export MIGRATION_TEST_CASE="assistants"
+export MAX_TURNS=65
+
+# Run with environment variables
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "$MIGRATION_TEST_CASE" \
+  --old-version "$OLD_VERSION" \
+  --new-version "$NEW_VERSION" \
+  --max-turns "$MAX_TURNS"
+```
+
+## Advanced Configuration
+
+### Custom Model Configuration
+
+```bash
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants" \
+  --old-version "path/to/old.exe" \
+  --new-version "path/to/new.exe" \
+  --model-name "gpt-4" \
+  --model-provider "openai" \
+  --model-base-url "https://api.openai.com/v1" \
+  --max-turns 80
+```
+
+### ReportPortal Integration
+
+```bash
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants" \
+  --old-version "path/to/old.exe" \
+  --new-version "path/to/new.exe" \
+  --enable-reportportal \
+  --rp-token "YOUR_TOKEN" \
+  --rp-project "jan_migration_tests" \
+  --max-turns 65
+```
+
+### Custom Test Directory
+
+```bash
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants" \
+  --old-version "path/to/old.exe" \
+  --new-version "path/to/new.exe" \
+  --tests-dir "custom_tests" \
+  --max-turns 65
+```
+
+## Examples
+
+### Example 1: Basic Assistants Migration Test
+
+```bash
+# Test custom assistants persistence
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants" \
+  --old-version "C:\Users\ziczac computer\Downloads\Jan_0.6.6_x64-setup.exe" \
+  --new-version "C:\Users\ziczac computer\Downloads\Jan_0.6.7_x64-setup.exe" \
+  --max-turns 65
+```
+
+**What this does:**
+1. Installs Jan 0.6.6
+2. Creates custom assistants (Python Tutor, Creative Writer)
+3. Upgrades to Jan 0.6.7
+4. Verifies assistants persist with correct settings
+
+### Example 2: Complete Assistants Migration (Batch Mode)
+
+```bash
+# Test both creation and chat functionality
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants-complete" \
+  --migration-batch-mode \
+  --old-version "C:\Users\ziczac computer\Downloads\Jan_0.6.6_x64-setup.exe" \
+  --new-version "C:\Users\ziczac computer\Downloads\Jan_0.6.7_x64-setup.exe" \
+  --max-turns 75
+```
+
+**What this does:**
+1. Installs Jan 0.6.6
+2. Creates custom assistants
+3. Tests chat functionality with assistants
+4. Upgrades to Jan 0.6.7
+5. Verifies both creation and chat data persist
+
+### Example 3: Models Migration Test
+
+```bash
+# Test model downloads and settings persistence
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "models" \
+  --old-version "C:\Users\ziczac computer\Downloads\Jan_0.6.6_x64-setup.exe" \
+  --new-version "C:\Users\ziczac computer\Downloads\Jan_0.6.7_x64-setup.exe" \
+  --max-turns 60
+```
+
+**What this does:**
+1. Installs Jan 0.6.6
+2. Downloads models (jan-nano-gguf, gemma-2-2b-instruct-gguf)
+3. Configures model settings
+4. Upgrades to Jan 0.6.7
+5. Verifies models persist and settings maintained
+
+## Troubleshooting
+
+### Common Issues
+
+#### 1. Installer Path Issues
+```bash
+# Use absolute paths with proper escaping
+--old-version "C:\Users\ziczac computer\Downloads\Jan_0.6.6_x64-setup.exe"
+--new-version "C:\Users\ziczac computer\Downloads\Jan_0.6.7_x64-setup.exe"
+```
+
+#### 2. Turn Limit Too Low
+```bash
+# Increase max turns for complex tests
+--max-turns 75  # Instead of default 30
+```
+
+#### 3. Test Case Not Found
+```bash
+# Verify test case key exists
+--migration-test-case "assistants"  # Valid: models, assistants, assistants-complete
+```
+
+#### 4. Permission Issues
+```bash
+# Run as administrator on Windows
+# Use sudo on Linux/macOS for system-wide installations
+```
+
+### Debug Mode
+
+Enable detailed logging for troubleshooting:
+
+```bash
+# Set logging level
+export PYTHONPATH=.
+export LOG_LEVEL=DEBUG
+
+# Run with verbose output
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants" \
+  --old-version "path/to/old.exe" \
+  --new-version "path/to/new.exe" \
+  --max-turns 65
+```
+
+### Test Results
+
+Migration tests generate detailed results:
+
+- **Setup Phase Results**: Success/failure for each setup test
+- **Upgrade Results**: Installation success status
+- **Verification Phase Results**: Success/failure for each verification test
+- **Overall Success**: Combined result from all phases
+
+### Output Files
+
+Tests generate several output files:
+
+- **Trajectories**: `trajectories/` - Agent interaction logs
+- **Recordings**: `recordings/` - Screen recordings (MP4)
+- **Logs**: Console output with detailed execution information
+
+## Best Practices
+
+### 1. Test Environment
+- Use clean virtual machines or fresh system installations
+- Ensure sufficient disk space for installers and test data
+- Close other applications during testing
+
+### 2. Test Data
+- Use realistic test data (assistant names, descriptions, instructions)
+- Test with multiple models and configurations
+- Verify edge cases and error conditions
+
+### 3. Execution
+- Start with individual mode for debugging
+- Use batch mode for production testing
+- Monitor system resources during execution
+
+### 4. Validation
+- Verify test results manually when possible
+- Check generated artifacts (trajectories, recordings)
+- Compare expected vs. actual behavior
+
+## Next Steps
+
+1. **Start Simple**: Begin with basic migration tests
+2. **Add Complexity**: Gradually test more complex scenarios
+3. **Automate**: Integrate into CI/CD pipelines
+4. **Extend**: Add new test cases for specific features
+5. **Optimize**: Refine test parameters and configurations
+
+For more information, see the main [README.md](README.md) and explore the test files in the `tests/` directory.
--- a/autoqa/QUICK_START.md
+++ b/autoqa/QUICK_START.md
@ -0,0 +1,272 @@
+# AutoQA Quick Start Guide
+
+🚀 Get started with AutoQA in minutes - from basic testing to migration verification.
+
+## Quick Start
+
+### 1. Install Dependencies
+
+```bash
+# Install required packages
+pip install -r requirements.txt
+```
+
+### 2. Basic Testing (No Migration)
+
+```bash
+# Run all base tests
+python main.py
+
+# Run specific test category
+python main.py --tests-dir "tests/base"
+
+# Custom configuration
+python main.py \
+  --max-turns 50
+```
+
+### 3. Migration Testing
+
+```bash
+# Basic migration test
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants" \
+  --old-version "C:\path\to\old\installer.exe" \
+  --new-version "C:\path\to\new\installer.exe" \
+  --max-turns 65
+
+# Batch mode migration test
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants-complete" \
+  --migration-batch-mode \
+  --old-version "C:\path\to\old\installer.exe" \
+  --new-version "C:\path\to\new\installer.exe" \
+  --max-turns 75
+```
+
+### 4. Reliability Testing
+
+```bash
+# Development phase (5 runs)
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase development \
+  --max-turns 40
+
+# Deployment phase (20 runs)
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase deployment \
+  --max-turns 40
+
+# Custom number of runs
+python main.py \
+  --enable-reliability-test \
+  --reliability-runs 10 \
+  --max-turns 40
+```
+
+## Test Types
+
+### Base Test Cases
+- **Default Assistant**: Verify Jan default assistant exists
+- **Extensions**: Check available extensions
+- **Hardware Info**: Verify hardware information display
+- **Model Providers**: Check model provider availability
+- **User Chat**: Test basic chat functionality
+- **MCP Server**: Test experimental features
+
+### Migration Test Cases
+- **`models`**: Test downloaded models persist after upgrade
+- **`assistants`**: Test custom assistants persist after upgrade
+- **`assistants-complete`**: Test both creation and chat functionality
+
+### Reliability Testing
+- **Development Phase**: Run test 5 times to verify basic stability (≥80% success rate)
+- **Deployment Phase**: Run test 20 times to verify production readiness (≥90% success rate)
+- **Custom Runs**: Specify custom number of runs for specific testing needs
+
+## Common Commands
+
+### Basic Workflow
+```bash
+# Run all tests
+python main.py
+
+# Run with ReportPortal
+python main.py --enable-reportportal --rp-token "YOUR_TOKEN"
+
+# Custom test directory
+python main.py --tests-dir "my_tests"
+
+# Skip computer server auto-start
+python main.py --skip-server-start
+```
+
+### Migration Workflow
+```bash
+# Test assistants migration
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants" \
+  --old-version "path/to/old.exe" \
+  --new-version "path/to/new.exe"
+
+# Test models migration
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "models" \
+  --old-version "path/to/old.exe" \
+  --new-version "path/to/new.exe"
+
+# Test complete assistants migration (batch mode)
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants-complete" \
+  --migration-batch-mode \
+  --old-version "path/to/old.exe" \
+  --new-version "path/to/new.exe"
+
+# Test reliability - development phase
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase development \
+  --max-turns 40
+
+# Test reliability - deployment phase
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase deployment \
+  --max-turns 40
+
+## Configuration Options
+
+### Essential Arguments
+| Argument | Description | Default |
+|----------|-------------|---------|
+| `--max-turns` | Maximum turns per test | 30 |
+| `--tests-dir` | Test files directory | `tests` |
+| `--jan-app-path` | Jan executable path | auto-detected |
+| `--model-name` | AI model name | UI-TARS-1.5-7B |
+
+### Migration Arguments
+| Argument | Description | Required |
+|----------|-------------|----------|
+| `--enable-migration-test` | Enable migration mode | Yes |
+| `--migration-test-case` | Test case to run | Yes |
+| `--migration-batch-mode` | Use batch mode | No |
+| `--old-version` | Old installer path | Yes |
+| `--new-version` | New installer path | Yes |
+
+### ReportPortal Arguments
+| Argument | Description | Required |
+|----------|-------------|----------|
+| `--enable-reportportal` | Enable RP integration | No |
+| `--rp-token` | ReportPortal token | Yes (if RP enabled) |
+| `--rp-endpoint` | RP endpoint URL | No |
+| `--rp-project` | RP project name | No |
+
+### Reliability Testing Arguments
+| Argument | Description | Required |
+|----------|-------------|----------|
+| `--enable-reliability-test` | Enable reliability mode | Yes |
+| `--reliability-phase` | Testing phase (development/deployment) | No |
+| `--reliability-runs` | Custom number of runs | No |
+| `--reliability-test-path` | Specific test file path | No |
+
+## Environment Variables
+
+```bash
+# Set common variables
+export MAX_TURNS=65
+export MODEL_NAME="gpt-4"
+export MODEL_BASE_URL="https://api.openai.com/v1"
+export JAN_APP_PATH="C:\path\to\Jan.exe"
+
+# Use in commands
+python main.py --max-turns "$MAX_TURNS"
+```
+
+## Examples
+
+### Example 1: Basic Testing
+```bash
+# Test core functionality
+python main.py \
+  --max-turns 40 \
+  --tests-dir "tests/base"
+```
+
+### Example 2: Simple Migration
+```bash
+# Test assistants persistence
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants" \
+  --old-version "Jan_0.6.6.exe" \
+  --new-version "Jan_0.6.7.exe" \
+  --max-turns 65
+```
+
+### Example 3: Advanced Migration
+```bash
+# Test complete functionality with ReportPortal
+python main.py \
+  --enable-migration-test \
+  --migration-test-case "assistants-complete" \
+  --migration-batch-mode \
+  --old-version "Jan_0.6.6.exe" \
+  --new-version "Jan_0.6.7.exe" \
+  --max-turns 75 \
+  --enable-reportportal \
+  --rp-token "YOUR_TOKEN" \
+  --rp-project "jan_migration_tests"
+```
+
+### Example 4: Reliability Testing
+```bash
+# Test reliability with deployment phase
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase deployment \
+  --reliability-test-path "tests/base/default-jan-assistant.txt" \
+  --max-turns 50 \
+  --enable-reportportal \
+  --rp-token "YOUR_TOKEN" \
+  --rp-project "jan_reliability_tests"
+```
+
+## Troubleshooting
+
+### Common Issues
+1. **Path Issues**: Use absolute paths with proper escaping
+2. **Turn Limits**: Increase `--max-turns` for complex tests
+3. **Permissions**: Run as administrator on Windows
+4. **Dependencies**: Ensure all packages are installed
+
+### Debug Mode
+```bash
+# Enable verbose logging
+export LOG_LEVEL=DEBUG
+export PYTHONPATH=.
+
+# Run with debug output
+python main.py --enable-migration-test ...
+```
+
+## Output Files
+
+- **Trajectories**: `trajectories/` - Agent interaction logs
+- **Recordings**: `recordings/` - Screen recordings (MP4)
+- **Console**: Detailed execution logs
+
+## Next Steps
+
+1. **Start Simple**: Run basic tests first
+2. **Add Migration**: Test data persistence
+3. **Customize**: Adjust parameters for your needs
+4. **Integrate**: Add to CI/CD pipelines
+
+For detailed documentation, see [MIGRATION_TESTING.md](MIGRATION_TESTING.md) and [README.md](README.md).
--- a/autoqa/README.md
+++ b/autoqa/README.md
@ -12,6 +12,7 @@
 - 🎯 **Flexible Configuration**: Command-line arguments and environment variables
 - 🌐 **Cross-platform**: Windows, macOS, and Linux support
 - 📁 **Test Discovery**: Automatically scans test files from directory
+- 🧪 **Reliability Testing**: Run tests multiple times to verify stability (development: 5 runs, deployment: 20 runs)

 ## Prerequisites

@ -74,6 +75,25 @@ python main.py \
  --rp-token "YOUR_API_TOKEN"
 ```

+### Reliability Testing
+
+```bash
+# Development phase (5 runs) - verify basic stability
+python main.py --enable-reliability-test --reliability-phase development
+
+# Deployment phase (20 runs) - verify production readiness
+python main.py --enable-reliability-test --reliability-phase deployment
+
+# Custom number of runs
+python main.py --enable-reliability-test --reliability-runs 10
+
+# Test specific file with reliability testing
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase development \
+  --reliability-test-path "tests/base/default-jan-assistant.txt"
+```
+
 ## Configuration

 ### Command Line Arguments
--- a/autoqa/RELIABILITY_TESTING.md
+++ b/autoqa/RELIABILITY_TESTING.md
@ -0,0 +1,296 @@
+# AutoQA Reliability Testing Guide
+
+🚀 Comprehensive guide for running reliability tests with AutoQA to verify test case stability and reliability.
+
+## Overview
+
+Reliability testing is designed to verify that your test cases are stable and reliable by running them multiple times. This helps identify flaky tests and ensures consistent behavior before deploying to production.
+
+## Two Testing Phases
+
+### 1. Development Phase
+- **Purpose**: Verify basic stability during development
+- **Runs**: 5 times
+- **Success Rate Requirement**: ≥80%
+- **Use Case**: During development to catch obvious stability issues
+
+### 2. Deployment Phase
+- **Purpose**: Verify production readiness
+- **Runs**: 20 times
+- **Success Rate Requirement**: ≥90%
+- **Use Case**: Before deploying to production to ensure reliability
+
+## Command Line Usage
+
+### Basic Reliability Testing
+
+```bash
+# Development phase (5 runs)
+python main.py --enable-reliability-test --reliability-phase development
+
+# Deployment phase (20 runs)
+python main.py --enable-reliability-test --reliability-phase deployment
+```
+
+### Custom Configuration
+
+```bash
+# Custom number of runs
+python main.py --enable-reliability-test --reliability-runs 10
+
+# Specific test file
+python main.py --enable-reliability-test --reliability-test-path "tests/base/default-jan-assistant.txt"
+
+# Custom max turns
+python main.py --enable-reliability-test --reliability-phase development --max-turns 50
+```
+
+### With ReportPortal Integration
+
+```bash
+# Development phase with ReportPortal
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase development \
+  --enable-reportportal \
+  --rp-token "YOUR_TOKEN" \
+  --rp-project "jan_reliability_tests"
+
+# Deployment phase with ReportPortal
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase deployment \
+  --enable-reportportal \
+  --rp-token "YOUR_TOKEN" \
+  --rp-project "jan_reliability_tests"
+```
+
+## Environment Variables
+
+```bash
+# Enable reliability testing
+export ENABLE_RELIABILITY_TEST=true
+
+# Set phase
+export RELIABILITY_PHASE=deployment
+
+# Custom runs (overrides phase)
+export RELIABILITY_RUNS=15
+
+# Specific test path
+export RELIABILITY_TEST_PATH="tests/base/my-test.txt"
+
+# Run with environment variables
+python main.py --enable-reliability-test
+```
+
+## Command Line Arguments
+
+| Argument | Environment Variable | Default | Description |
+|----------|---------------------|---------|-------------|
+| `--enable-reliability-test` | `ENABLE_RELIABILITY_TEST` | `false` | Enable reliability testing mode |
+| `--reliability-phase` | `RELIABILITY_PHASE` | `development` | Testing phase: development or deployment |
+| `--reliability-runs` | `RELIABILITY_RUNS` | `0` | Custom number of runs (overrides phase) |
+| `--reliability-test-path` | `RELIABILITY_TEST_PATH` | - | Specific test file path |
+
+## Test Execution Flow
+
+### Single Test Reliability Testing
+
+1. **Load Test File**: Read the specified test file
+2. **Run Multiple Times**: Execute the test the specified number of times
+3. **Track Results**: Monitor success/failure for each run
+4. **Calculate Success Rate**: Determine overall reliability
+5. **Generate Report**: Provide detailed results and statistics
+
+### Multiple Tests Reliability Testing
+
+1. **Scan Test Files**: Find all test files in the specified directory
+2. **Run Reliability Tests**: Execute reliability testing on each test file
+3. **Aggregate Results**: Combine results from all tests
+4. **Overall Assessment**: Determine if the entire test suite is reliable
+
+## Output and Results
+
+### Success Rate Calculation
+
+```
+Success Rate = (Successful Runs / Total Runs) × 100
+```
+
+### Development Phase Requirements
+- **Target**: 5 runs
+- **Minimum Success Rate**: 80%
+- **Result**: PASS if ≥80%, FAIL if <80%
+
+### Deployment Phase Requirements
+- **Target**: 20 runs
+- **Minimum Success Rate**: 90%
+- **Result**: PASS if ≥90%, FAIL if <90%
+
+### Sample Output
+
+```
+==========================================
+RELIABILITY TEST SUMMARY
+==========================================
+Test: tests/base/default-jan-assistant.txt
+Phase: DEVELOPMENT
+Completed runs: 5/5
+Successful runs: 4
+Failed runs: 1
+Success rate: 80.0%
+Total duration: 125.3 seconds
+Average duration per run: 25.1 seconds
+Overall result: ✅ PASSED
+Development phase requirement: ≥80% success rate
+```
+
+## Use Cases
+
+### 1. New Test Development
+```bash
+# Test a new test case for basic stability
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase development \
+  --reliability-test-path "tests/base/my-new-test.txt"
+```
+
+### 2. Pre-Production Validation
+```bash
+# Verify test suite is production-ready
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase deployment \
+  --tests-dir "tests/base"
+```
+
+### 3. Flaky Test Investigation
+```bash
+# Run a potentially flaky test multiple times
+python main.py \
+  --enable-reliability-test \
+  --reliability-runs 25 \
+  --reliability-test-path "tests/base/flaky-test.txt"
+```
+
+### 4. CI/CD Integration
+```bash
+# Automated reliability testing in CI/CD
+ENABLE_RELIABILITY_TEST=true \
+RELIABILITY_PHASE=deployment \
+python main.py --max-turns 40
+```
+
+## Best Practices
+
+### 1. Start with Development Phase
+- Begin with 5 runs to catch obvious issues
+- Use during active development
+- Quick feedback on test stability
+
+### 2. Use Deployment Phase for Production
+- Run 20 times before production deployment
+- Ensures high reliability standards
+- Catches intermittent failures
+
+### 3. Custom Runs for Specific Needs
+- Use custom run counts for special testing scenarios
+- Investigate flaky tests with higher run counts
+- Balance between thoroughness and execution time
+
+### 4. Monitor Execution Time
+- Reliability testing takes longer than single runs
+- Plan accordingly for CI/CD pipelines
+- Consider parallel execution for multiple test files
+
+## Troubleshooting
+
+### Common Issues
+
+#### 1. Test File Not Found
+```bash
+# Ensure test path is correct
+python main.py \
+  --enable-reliability-test \
+  --reliability-test-path "tests/base/existing-test.txt"
+```
+
+#### 2. Low Success Rate
+- Check test environment stability
+- Verify test dependencies
+- Review test logic for race conditions
+
+#### 3. Long Execution Time
+- Reduce max turns if appropriate
+- Use development phase for quick feedback
+- Consider running fewer test files
+
+### Debug Mode
+
+```bash
+# Enable debug logging
+export LOG_LEVEL=DEBUG
+export PYTHONPATH=.
+
+# Run with verbose output
+python main.py --enable-reliability-test --reliability-phase development
+```
+
+## Integration with Existing Workflows
+
+### Migration Testing
+```bash
+# Run reliability tests on migration test cases
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase deployment \
+  --tests-dir "tests/migration"
+```
+
+### Base Testing
+```bash
+# Run reliability tests on base test cases
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase development \
+  --tests-dir "tests/base"
+```
+
+### Custom Test Directories
+```bash
+# Run reliability tests on custom test directory
+python main.py \
+  --enable-reliability-test \
+  --reliability-phase deployment \
+  --tests-dir "my_custom_tests"
+```
+
+## Performance Considerations
+
+### Execution Time
+- **Development Phase**: ~5x single test execution time
+- **Deployment Phase**: ~20x single test execution time
+- **Multiple Tests**: Multiply by number of test files
+
+### Resource Usage
+- Screen recordings for each run
+- Trajectory data for each run
+- ReportPortal uploads (if enabled)
+
+### Optimization Tips
+- Use development phase for quick feedback
+- Run deployment phase during off-peak hours
+- Consider parallel execution for multiple test files
+- Clean up old recordings and trajectories regularly
+
+## Next Steps
+
+1. **Start Simple**: Begin with development phase on single test files
+2. **Scale Up**: Move to deployment phase for critical tests
+3. **Automate**: Integrate into CI/CD pipelines
+4. **Monitor**: Track reliability trends over time
+5. **Improve**: Use results to identify and fix flaky tests
+
+For more information, see the main [README.md](README.md), [QUICK_START.md](QUICK_START.md), and explore the test files in the `tests/` directory.
--- a/autoqa/batch_migration_runner.py
+++ b/autoqa/batch_migration_runner.py
@ -0,0 +1,413 @@
+import asyncio
+import logging
+import os
+import time
+from datetime import datetime
+from pathlib import Path
+import threading
+
+from utils import force_close_jan, is_jan_running, start_jan_app
+from migration_utils import install_jan_version, prepare_migration_environment
+from test_runner import run_single_test_with_timeout
+from agent import ComputerAgent, LLM
+from screen_recorder import ScreenRecorder
+from reportportal_handler import upload_test_results_to_rp
+from utils import get_latest_trajectory_folder
+from reportportal_handler import extract_test_result_from_trajectory
+
+logger = logging.getLogger(__name__)
+
+async def run_single_test_with_timeout_no_restart(computer, test_data, rp_client, launch_id, max_turns=30, 
+                                                 jan_app_path=None, jan_process_name="Jan.exe", agent_config=None, 
+                                                 enable_reportportal=False):
+    """
+    Run a single test case WITHOUT restarting the Jan app - assumes app is already running
+    Returns dict with test result: {"success": bool, "status": str, "message": str}
+    """
+    path = test_data['path']
+    prompt = test_data['prompt']
+    
+    # Detect if using nightly version based on process name
+    is_nightly = "nightly" in jan_process_name.lower() if jan_process_name else False
+    
+    # Default agent config if not provided
+    if agent_config is None:
+        agent_config = {
+            "loop": "uitars",
+            "model_provider": "oaicompat",
+            "model_name": "ByteDance-Seed/UI-TARS-1.5-7B",
+            "model_base_url": "http://10.200.108.58:1234/v1"
+        }
+    
+    # Create trajectory_dir from path (remove .txt extension)
+    trajectory_name = str(Path(path).with_suffix(''))
+    trajectory_base_dir = os.path.abspath(f"trajectories/{trajectory_name.replace(os.sep, '/')}")
+    
+    # Ensure trajectories directory exists
+    os.makedirs(os.path.dirname(trajectory_base_dir), exist_ok=True)
+    
+    # Create recordings directory
+    recordings_dir = "recordings"
+    os.makedirs(recordings_dir, exist_ok=True)
+    
+    # Create video filename
+    current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
+    safe_test_name = trajectory_name.replace('/', '_').replace('\\', '_')
+    video_filename = f"{safe_test_name}_{current_time}.mp4"
+    video_path = os.path.abspath(os.path.join(recordings_dir, video_filename))
+    
+    # Initialize screen recorder
+    recorder = ScreenRecorder(video_path, fps=10)
+    
+    try:
+        # Check if Jan app is running (don't restart)
+        from utils import is_jan_running
+        if not is_jan_running(jan_process_name):
+            logger.warning(f"Jan application ({jan_process_name}) is not running, but continuing anyway")
+        else:
+            # Ensure window is maximized for this test
+            from utils import maximize_jan_window
+            if maximize_jan_window():
+                logger.info("Jan application window maximized for test")
+            else:
+                logger.warning("Could not maximize Jan application window for test")
+        
+        # Start screen recording
+        recorder.start_recording()
+        
+        # Create agent for this test using config
+        agent = ComputerAgent(
+            computer=computer,
+            loop=agent_config["loop"],
+            model=LLM(
+                provider=agent_config["model_provider"],
+                name=agent_config["model_name"],
+                provider_base_url=agent_config["model_base_url"]
+            ),
+            trajectory_dir=trajectory_base_dir
+        )
+        
+        # Run the test with prompt
+        logger.info(f"Running test case: {path}")
+        
+        async for result in agent.run(prompt):
+            logger.info(f"Test result for {path}: {result}")
+            print(result)
+        
+        # Stop screen recording
+        recorder.stop_recording()
+        
+        # Extract test result
+        trajectory_folder = get_latest_trajectory_folder(path)
+        test_result = extract_test_result_from_trajectory(trajectory_folder)
+        
+        # Upload to ReportPortal if enabled
+        if enable_reportportal and rp_client and launch_id:
+            upload_test_results_to_rp(rp_client, launch_id, test_result, trajectory_folder)
+        
+        return test_result
+        
+    except Exception as e:
+        logger.error(f"Test failed with exception: {e}")
+        recorder.stop_recording()
+        return {"success": False, "status": "error", "message": str(e)}
+    finally:
+        # Stop screen recording
+        recorder.stop_recording()
+        
+        # Don't close Jan app - let it keep running for the next test
+        logger.info(f"Completed test: {path} (Jan app kept running)")
+
+async def run_batch_migration_test(computer, old_version_path, new_version_path, 
+                                 rp_client=None, launch_id=None, max_turns=30, agent_config=None, 
+                                 enable_reportportal=False, test_cases=None):
+    """
+    Run migration test with batch approach: all setups first, then upgrade, then all verifies
+    
+    This approach is more realistic (like a real user) but less granular for debugging
+    """
+    from individual_migration_runner import MIGRATION_TEST_CASES
+    
+    if test_cases is None:
+        test_cases = list(MIGRATION_TEST_CASES.keys())
+    
+    logger.info("=" * 100)
+    logger.info("RUNNING BATCH MIGRATION TESTS")
+    logger.info("=" * 100)
+    logger.info(f"Test cases: {', '.join(test_cases)}")
+    logger.info("Approach: Setup All → Upgrade → Verify All")
+    logger.info("")
+    
+    batch_result = {
+        "overall_success": False,
+        "setup_phase_success": False,
+        "upgrade_success": False,
+        "verification_phase_success": False,
+        "setup_results": {},
+        "verify_results": {},
+        "error_message": None
+    }
+    
+    try:
+        # Prepare migration environment
+        env_setup = prepare_migration_environment()
+        logger.info(f"Migration environment prepared: {env_setup}")
+        
+        # PHASE 1: Install old version and run ALL setup tests
+        logger.info("=" * 80)
+        logger.info("PHASE 1: BATCH SETUP ON OLD VERSION")
+        logger.info("=" * 80)
+        
+        install_jan_version(old_version_path, "old")
+        time.sleep(15)  # Extra wait time for stability
+        
+        # Force close any existing Jan processes before starting fresh
+        logger.info("Force closing any existing Jan processes...")
+        force_close_jan("Jan.exe")
+        force_close_jan("Jan-nightly.exe")
+        time.sleep(5)  # Wait for processes to fully close
+        
+        # Start Jan app once for the entire setup phase
+        logger.info("Starting Jan application for setup phase...")
+        start_jan_app()
+        time.sleep(10)  # Wait for app to be ready
+        
+        # Ensure window is maximized for testing
+        from utils import maximize_jan_window
+        if maximize_jan_window():
+            logger.info("Jan application window maximized for setup phase")
+        else:
+            logger.warning("Could not maximize Jan application window for setup phase")
+        
+        setup_failures = 0
+        
+        for i, test_case_key in enumerate(test_cases, 1):
+            test_case = MIGRATION_TEST_CASES[test_case_key]
+            logger.info(f"[{i}/{len(test_cases)}] Running setup: {test_case['name']}")
+            
+            # Support both single setup_test and multiple setup_tests
+            setup_files = []
+            if 'setup_tests' in test_case:
+                setup_files = test_case['setup_tests']
+            elif 'setup_test' in test_case:
+                setup_files = [test_case['setup_test']]
+            else:
+                logger.error(f"No setup tests defined for {test_case_key}")
+                batch_result["setup_results"][test_case_key] = False
+                setup_failures += 1
+                continue
+            
+            # Run all setup files for this test case
+            test_case_setup_success = True
+            for j, setup_file in enumerate(setup_files, 1):
+                logger.info(f"  [{j}/{len(setup_files)}] Running setup file: {setup_file}")
+                
+                # Load and run setup test
+                setup_test_path = f"tests/migration/{setup_file}"
+                if not os.path.exists(setup_test_path):
+                    logger.error(f"Setup test file not found: {setup_test_path}")
+                    test_case_setup_success = False
+                    continue
+
+                with open(setup_test_path, "r", encoding="utf-8") as f:
+                    setup_content = f.read()
+
+                setup_test_data = {
+                    "path": setup_file,
+                    "prompt": setup_content
+                }
+
+                # Run test without restarting Jan app (assumes Jan is already running)
+                setup_result = await run_single_test_with_timeout_no_restart(
+                    computer=computer,
+                    test_data=setup_test_data,
+                    rp_client=rp_client,
+                    launch_id=launch_id,
+                    max_turns=max_turns,
+                    jan_app_path=None,
+                    jan_process_name="Jan.exe",
+                    agent_config=agent_config,
+                    enable_reportportal=enable_reportportal
+                )
+
+                success = setup_result.get("success", False) if setup_result else False
+                if success:
+                    logger.info(f"    ✅ Setup file {setup_file}: SUCCESS")
+                else:
+                    logger.error(f"    ❌ Setup file {setup_file}: FAILED")
+                    test_case_setup_success = False
+
+                # Small delay between setup files
+                time.sleep(3)
+            
+            # Record overall result for this test case
+            batch_result["setup_results"][test_case_key] = test_case_setup_success
+            
+            if test_case_setup_success:
+                logger.info(f"✅ Setup {test_case_key}: SUCCESS (all {len(setup_files)} files completed)")
+            else:
+                logger.error(f"❌ Setup {test_case_key}: FAILED (one or more files failed)")
+                setup_failures += 1
+            
+            # Small delay between setups
+            time.sleep(3)
+        
+        batch_result["setup_phase_success"] = setup_failures == 0
+        logger.info(f"Setup phase complete: {len(test_cases) - setup_failures}/{len(test_cases)} successful")
+        
+        if setup_failures > 0:
+            logger.warning(f"{setup_failures} setup tests failed - continuing with upgrade anyway")
+        
+        # PHASE 2: Upgrade to new version
+        logger.info("=" * 80)
+        logger.info("PHASE 2: UPGRADING TO NEW VERSION")
+        logger.info("=" * 80)
+        
+        force_close_jan("Jan.exe")
+        force_close_jan("Jan-nightly.exe")
+        time.sleep(5)
+        
+        install_jan_version(new_version_path, "new")
+        batch_result["upgrade_success"] = True
+        time.sleep(15)  # Extra wait time after upgrade
+        
+        # Force close any existing Jan processes before starting fresh
+        logger.info("Force closing any existing Jan processes...")
+        force_close_jan("Jan.exe")
+        force_close_jan("Jan-nightly.exe")
+        time.sleep(5)  # Wait for processes to fully close
+        
+        # Start Jan app once for the entire verification phase
+        logger.info("Starting Jan application for verification phase...")
+        start_jan_app()
+        time.sleep(10)  # Wait for app to be ready
+        
+        # Ensure window is maximized for testing
+        from utils import maximize_jan_window
+        if maximize_jan_window():
+            logger.info("Jan application window maximized for verification phase")
+        else:
+            logger.warning("Could not maximize Jan application window for verification phase")
+        
+        # PHASE 3: Run ALL verification tests on new version
+        logger.info("=" * 80) 
+        logger.info("PHASE 3: BATCH VERIFICATION ON NEW VERSION")
+        logger.info("=" * 80)
+        
+        verify_failures = 0
+        
+        for i, test_case_key in enumerate(test_cases, 1):
+            test_case = MIGRATION_TEST_CASES[test_case_key]
+            logger.info(f"[{i}/{len(test_cases)}] Running verification: {test_case['name']}")
+            
+            # Skip verification if setup failed (optional - you could still try)
+            if not batch_result["setup_results"].get(test_case_key, False):
+                logger.warning(f"Skipping verification for {test_case_key} - setup failed")
+                batch_result["verify_results"][test_case_key] = False
+                verify_failures += 1
+                continue
+            
+            # Support both single verify_test and multiple verify_tests
+            verify_files = []
+            if 'verify_tests' in test_case:
+                verify_files = test_case['verify_tests']
+            elif 'verify_test' in test_case:
+                verify_files = [test_case['verify_test']]
+            else:
+                logger.error(f"No verify tests defined for {test_case_key}")
+                batch_result["verify_results"][test_case_key] = False
+                verify_failures += 1
+                continue
+            
+            # Run all verify files for this test case
+            test_case_verify_success = True
+            for j, verify_file in enumerate(verify_files, 1):
+                logger.info(f"  [{j}/{len(verify_files)}] Running verify file: {verify_file}")
+                
+                # Load and run verification test
+                verify_test_path = f"tests/migration/{verify_file}"
+                if not os.path.exists(verify_test_path):
+                    logger.error(f"Verification test file not found: {verify_test_path}")
+                    test_case_verify_success = False
+                    continue
+
+                with open(verify_test_path, "r", encoding="utf-8") as f:
+                    verify_content = f.read()
+
+                verify_test_data = {
+                    "path": verify_file,
+                    "prompt": verify_content
+                }
+
+                # Run test without restarting Jan app (assumes Jan is already running)
+                verify_result = await run_single_test_with_timeout_no_restart(
+                    computer=computer,
+                    test_data=verify_test_data,
+                    rp_client=rp_client,
+                    launch_id=launch_id,
+                    max_turns=max_turns,
+                    jan_app_path=None,
+                    jan_process_name="Jan.exe",
+                    agent_config=agent_config,
+                    enable_reportportal=enable_reportportal
+                )
+
+                success = verify_result.get("success", False) if verify_result else False
+                if success:
+                    logger.info(f"    ✅ Verify file {verify_file}: SUCCESS")
+                else:
+                    logger.error(f"    ❌ Verify file {verify_file}: FAILED")
+                    test_case_verify_success = False
+
+                # Small delay between verify files
+                time.sleep(3)
+            
+            # Record overall result for this test case
+            batch_result["verify_results"][test_case_key] = test_case_verify_success
+            
+            if test_case_verify_success:
+                logger.info(f"✅ Verify {test_case_key}: SUCCESS (all {len(verify_files)} files completed)")
+            else:
+                logger.error(f"❌ Verify {test_case_key}: FAILED (one or more files failed)")
+                verify_failures += 1
+            
+            # Small delay between verifications
+            time.sleep(3)
+        
+        batch_result["verification_phase_success"] = verify_failures == 0
+        logger.info(f"Verification phase complete: {len(test_cases) - verify_failures}/{len(test_cases)} successful")
+        
+        # Overall success calculation
+        batch_result["overall_success"] = (
+            batch_result["setup_phase_success"] and
+            batch_result["upgrade_success"] and
+            batch_result["verification_phase_success"]
+        )
+        
+        # Final summary
+        logger.info("=" * 100)
+        logger.info("BATCH MIGRATION TEST SUMMARY")
+        logger.info("=" * 100)
+        logger.info(f"Overall Success: {batch_result['overall_success']}")
+        logger.info(f"Setup Phase: {batch_result['setup_phase_success']} ({len(test_cases) - setup_failures}/{len(test_cases)})")
+        logger.info(f"Upgrade Phase: {batch_result['upgrade_success']}")
+        logger.info(f"Verification Phase: {batch_result['verification_phase_success']} ({len(test_cases) - verify_failures}/{len(test_cases)})")
+        logger.info("")
+        logger.info("Detailed Results:")
+        for test_case_key in test_cases:
+            setup_status = "✅" if batch_result["setup_results"].get(test_case_key, False) else "❌"
+            verify_status = "✅" if batch_result["verify_results"].get(test_case_key, False) else "❌"
+            logger.info(f"  {test_case_key.ljust(20)}: Setup {setup_status} | Verify {verify_status}")
+            
+        return batch_result
+        
+    except Exception as e:
+        logger.error(f"Batch migration test failed with exception: {e}")
+        batch_result["error_message"] = str(e)
+        return batch_result
+    finally:
+        # Cleanup
+        force_close_jan("Jan.exe")
+        force_close_jan("Jan-nightly.exe")
+
+
--- a/autoqa/individual_migration_runner.py
+++ b/autoqa/individual_migration_runner.py
@ -0,0 +1,498 @@
+import asyncio
+import logging
+import os
+import time
+from datetime import datetime
+from pathlib import Path
+
+from utils import force_close_jan, is_jan_running
+from migration_utils import install_jan_version, prepare_migration_environment
+from test_runner import run_single_test_with_timeout
+
+logger = logging.getLogger(__name__)
+
+# Migration test case definitions - organized by QA checklist categories
+MIGRATION_TEST_CASES = {
+    "models": {
+        "name": "Model Downloads Migration",
+        "setup_test": "models/setup-download-model.txt",
+        "verify_test": "models/verify-model-persistence.txt", 
+        "description": "Tests that downloaded models persist after upgrade"
+    },
+    "appearance_dark-theme": {
+        "name": "Dark Theme Appearance Migration",
+        "setup_test": "appearance/setup-dark-theme-appearance.txt",
+        "verify_test": "appearance/verify-dark-theme-appearance-persistence.txt",
+        "description": "Tests that the Dark theme setting persists after upgrade"
+    },
+    "appearance_light-theme": {
+        "name": "Light Theme Appearance Migration",
+        "setup_test": "appearance/setup-light-theme-appearance.txt",
+        "verify_test": "appearance/verify-light-theme-appearance-persistence.txt",
+        "description": "Tests that the Light theme setting persists after upgrade"
+    },
+    "appearance_font-size": {
+        "name": "Font Size Appearance Migration",
+        "setup_test": "appearance/setup-font-size-appearance.txt",
+        "verify_test": "appearance/verify-font-size-appearance-persistence.txt",
+        "description": "Tests that the Font Size setting (Extra Large) persists after upgrade"
+    },
+    "assistants": {
+        "name": "Custom Assistants Migration",
+        "setup_test": "assistants/setup-create-assistants.txt",
+        "verify_test": "assistants/verify-create-assistant-persistence.txt",
+        "description": "Tests that custom assistants persist after upgrade"
+    },
+    # "assistants-complete": {
+    #     "name": "Complete Assistants Migration (Create + Chat)",
+    #     "setup_tests": [
+    #         "assistants/setup-create-assistants.txt",
+    #         "assistants/setup-chat-with-assistant.txt"
+    #     ],
+    #     "verify_tests": [
+    #         "assistants/verify-create-assistant-persistence.txt", 
+    #         "assistants/verify-chat-with-assistant-persistence.txt"
+    #     ],
+    #     "description": "Tests that custom assistants creation and chat functionality persist after upgrade (batch mode only)"
+    # },
+    "modify-local-api-server": {
+        "name": "Modify Local API Server Settings Migration",
+        "setup_test": "settings/setup-local-api-server.txt",
+        "verify_test": "settings/verify-local-api-server-persistence.txt",
+        "description": "Tests that local API server settings (port and API prefix) persist after upgrade"
+    },
+    "modify-https-proxy": {
+        "name": "Modify HTTPS Proxy Settings Migration",
+        "setup_test": "settings/setup-https-proxy.txt",
+        "verify_test": "settings/verify-https-proxy-persistence.txt",
+        "description": "Tests that HTTPS proxy settings persist after upgrade"
+    },
+    "models_disable-model-providers": {
+        "name": "Model Providers Enabled/Disabled State Migration",
+        "setup_test": "models/setup-model-providers.txt",
+        "verify_test": "models/verify-model-providers-persistence.txt",
+        "description": "Ensures enabled/disabled state for providers persists after upgrade (Disabled: Llama.cpp, OpenAI, Anthropic; Enabled: Cohere, OpenRouter, Mistral, Groq, Gemini, Hugging Face)"
+    }
+}
+
+async def run_individual_migration_test(computer, test_case_key, old_version_path, new_version_path, 
+                                      rp_client=None, launch_id=None, max_turns=30, agent_config=None, 
+                                      enable_reportportal=False):
+    """
+    Run a single migration test case
+    
+    Args:
+        computer: Computer agent instance
+        test_case_key: Key identifying the test case (e.g., "models", "chat-threads")
+        old_version_path: Path to old version installer
+        new_version_path: Path to new version installer  
+        rp_client: ReportPortal client (optional)
+        launch_id: ReportPortal launch ID (optional)
+        max_turns: Maximum turns per test phase
+        agent_config: Agent configuration
+        enable_reportportal: Whether to upload to ReportPortal
+    """
+    if test_case_key not in MIGRATION_TEST_CASES:
+        raise ValueError(f"Unknown test case: {test_case_key}")
+    
+    test_case = MIGRATION_TEST_CASES[test_case_key]
+    
+    logger.info("=" * 80)
+    logger.info(f"RUNNING MIGRATION TEST: {test_case['name'].upper()}")
+    logger.info("=" * 80)
+    logger.info(f"Description: {test_case['description']}")
+    logger.info(f"Setup Test: tests/migration/{test_case['setup_test']}")
+    logger.info(f"Verify Test: tests/migration/{test_case['verify_test']}")
+    logger.info("")
+    logger.info("Test Flow:")
+    logger.info("  1. Install OLD version → Run SETUP test")
+    logger.info("  2. Install NEW version → Run VERIFY test") 
+    logger.info("  3. Cleanup and prepare for next test")
+    logger.info("")
+    
+    migration_result = {
+        "test_case": test_case_key,
+        "test_name": test_case["name"],
+        "overall_success": False,
+        "old_version_setup": False,
+        "new_version_install": False,
+        "upgrade_verification": False,
+        "error_message": None
+    }
+    
+    try:
+        # Prepare migration environment
+        env_setup = prepare_migration_environment()
+        logger.info(f"Migration environment prepared: {env_setup}")
+        
+        # Phase 1: Install old version and run setup test
+        logger.info("PHASE 1: Installing old version and running setup test")
+        logger.info("-" * 60)
+        
+        install_jan_version(old_version_path, "old")
+        time.sleep(10)  # Wait for Jan to be ready
+        
+        # Load and run setup test
+        setup_test_path = f"tests/migration/{test_case['setup_test']}"
+        if not os.path.exists(setup_test_path):
+            raise FileNotFoundError(f"Setup test file not found: {setup_test_path}")
+        
+        with open(setup_test_path, "r", encoding="utf-8") as f:
+            setup_content = f.read()
+        
+        setup_test_data = {
+            "path": test_case['setup_test'],
+            "prompt": setup_content
+        }
+        
+        setup_result = await run_single_test_with_timeout(
+            computer=computer,
+            test_data=setup_test_data,
+            rp_client=rp_client,
+            launch_id=launch_id,
+            max_turns=max_turns,
+            jan_app_path=None,  # Auto-detect
+            jan_process_name="Jan.exe",
+            agent_config=agent_config,
+            enable_reportportal=enable_reportportal
+        )
+        
+        migration_result["old_version_setup"] = setup_result.get("success", False) if setup_result else False
+        logger.info(f"Setup phase result: {migration_result['old_version_setup']}")
+        
+        if not migration_result["old_version_setup"]:
+            migration_result["error_message"] = f"Failed to setup {test_case['name']} on old version"
+            return migration_result
+        
+        # Phase 2: Install new version (upgrade)
+        logger.info("PHASE 2: Installing new version (upgrade)")
+        logger.info("-" * 60)
+        
+        # Force close Jan before installing new version
+        force_close_jan("Jan.exe")
+        force_close_jan("Jan-nightly.exe") 
+        time.sleep(5)
+        
+        # Install new version
+        install_jan_version(new_version_path, "new")
+        migration_result["new_version_install"] = True
+        time.sleep(10)  # Wait for new version to be ready
+        
+        # Phase 3: Run verification test on new version (includes data integrity check)
+        logger.info("PHASE 3: Running verification test on new version")
+        logger.info("-" * 60)
+        
+        # Load and run verification test
+        verify_test_path = f"tests/migration/{test_case['verify_test']}"
+        if not os.path.exists(verify_test_path):
+            raise FileNotFoundError(f"Verification test file not found: {verify_test_path}")
+        
+        with open(verify_test_path, "r", encoding="utf-8") as f:
+            verify_content = f.read()
+        
+        verify_test_data = {
+            "path": test_case['verify_test'],
+            "prompt": verify_content
+        }
+        
+        verify_result = await run_single_test_with_timeout(
+            computer=computer,
+            test_data=verify_test_data,
+            rp_client=rp_client,
+            launch_id=launch_id,
+            max_turns=max_turns,
+            jan_app_path=None,  # Auto-detect
+            jan_process_name="Jan.exe",
+            agent_config=agent_config,
+            enable_reportportal=enable_reportportal
+        )
+        
+        migration_result["upgrade_verification"] = verify_result.get("success", False) if verify_result else False
+        logger.info(f"Verification phase result: {migration_result['upgrade_verification']}")
+        
+        # Overall success check
+        migration_result["overall_success"] = (
+            migration_result["old_version_setup"] and
+            migration_result["new_version_install"] and  
+            migration_result["upgrade_verification"]
+        )
+        
+        logger.info("=" * 80)
+        logger.info(f"MIGRATION TEST COMPLETED: {test_case['name'].upper()}")
+        logger.info("=" * 80)
+        logger.info(f"Overall Success: {migration_result['overall_success']}")
+        logger.info(f"Old Version Setup: {migration_result['old_version_setup']}")
+        logger.info(f"New Version Install: {migration_result['new_version_install']}")
+        logger.info(f"Upgrade Verification: {migration_result['upgrade_verification']}")
+        
+        return migration_result
+        
+    except Exception as e:
+        logger.error(f"Migration test {test_case['name']} failed with exception: {e}")
+        migration_result["error_message"] = str(e)
+        return migration_result
+    finally:
+        # Cleanup: Force close any remaining Jan processes
+        force_close_jan("Jan.exe")
+        force_close_jan("Jan-nightly.exe")
+
+async def run_assistant_batch_migration_test(computer, old_version_path, new_version_path, 
+                                           rp_client=None, launch_id=None, max_turns=30, agent_config=None, 
+                                           enable_reportportal=False):
+    """
+    Run both assistant test cases in batch mode: 
+    - Setup both assistant tests on old version
+    - Upgrade to new version
+    - Verify both assistant tests on new version
+    """
+    assistant_test_cases = ["assistants", "assistant-chat"]
+    
+    logger.info("=" * 100)
+    logger.info("RUNNING ASSISTANT BATCH MIGRATION TESTS")
+    logger.info("=" * 100)
+    logger.info(f"Test cases: {', '.join(assistant_test_cases)}")
+    logger.info("Approach: Setup Both → Upgrade → Verify Both")
+    logger.info("")
+    
+    batch_result = {
+        "overall_success": False,
+        "setup_phase_success": False,
+        "upgrade_success": False,
+        "verification_phase_success": False,
+        "setup_results": {},
+        "verify_results": {},
+        "error_message": None
+    }
+    
+    try:
+        # Prepare migration environment
+        env_setup = prepare_migration_environment()
+        logger.info(f"Migration environment prepared: {env_setup}")
+        
+        # PHASE 1: Install old version and run BOTH setup tests
+        logger.info("=" * 80)
+        logger.info("PHASE 1: BATCH SETUP ON OLD VERSION")
+        logger.info("=" * 80)
+        
+        install_jan_version(old_version_path, "old")
+        time.sleep(15)  # Extra wait time for stability
+        
+        setup_failures = 0
+        
+        for i, test_case_key in enumerate(assistant_test_cases, 1):
+            test_case = MIGRATION_TEST_CASES[test_case_key]
+            logger.info(f"[{i}/{len(assistant_test_cases)}] Running setup: {test_case['name']}")
+            
+            # Load and run setup test
+            setup_test_path = f"tests/migration/{test_case['setup_test']}"
+            if not os.path.exists(setup_test_path):
+                logger.error(f"Setup test file not found: {setup_test_path}")
+                batch_result["setup_results"][test_case_key] = False
+                setup_failures += 1
+                continue
+            
+            with open(setup_test_path, "r") as f:
+                setup_content = f.read()
+            
+            setup_test_data = {
+                "path": test_case['setup_test'],
+                "prompt": setup_content
+            }
+            
+            setup_result = await run_single_test_with_timeout(
+                computer=computer,
+                test_data=setup_test_data,
+                rp_client=rp_client,
+                launch_id=launch_id,
+                max_turns=max_turns,
+                jan_app_path=None,
+                jan_process_name="Jan.exe",
+                agent_config=agent_config,
+                enable_reportportal=enable_reportportal
+            )
+            
+            success = setup_result.get("success", False) if setup_result else False
+            batch_result["setup_results"][test_case_key] = success
+            
+            if success:
+                logger.info(f"✅ Setup {test_case_key}: SUCCESS")
+            else:
+                logger.error(f"❌ Setup {test_case_key}: FAILED")
+                setup_failures += 1
+            
+            # Small delay between setups
+            time.sleep(3)
+        
+        batch_result["setup_phase_success"] = setup_failures == 0
+        logger.info(f"Setup phase complete: {len(assistant_test_cases) - setup_failures}/{len(assistant_test_cases)} successful")
+        
+        # PHASE 2: Upgrade to new version
+        logger.info("=" * 80)
+        logger.info("PHASE 2: UPGRADING TO NEW VERSION")
+        logger.info("=" * 80)
+        
+        force_close_jan("Jan.exe")
+        force_close_jan("Jan-nightly.exe")
+        time.sleep(5)
+        
+        install_jan_version(new_version_path, "new")
+        batch_result["upgrade_success"] = True
+        time.sleep(15)  # Extra wait time after upgrade
+        
+        # PHASE 3: Run BOTH verification tests on new version
+        logger.info("=" * 80) 
+        logger.info("PHASE 3: BATCH VERIFICATION ON NEW VERSION")
+        logger.info("=" * 80)
+        
+        verify_failures = 0
+        
+        for i, test_case_key in enumerate(assistant_test_cases, 1):
+            test_case = MIGRATION_TEST_CASES[test_case_key]
+            logger.info(f"[{i}/{len(assistant_test_cases)}] Running verification: {test_case['name']}")
+            
+            # Load and run verification test
+            verify_test_path = f"tests/migration/{test_case['verify_test']}"
+            if not os.path.exists(verify_test_path):
+                logger.error(f"Verification test file not found: {verify_test_path}")
+                batch_result["verify_results"][test_case_key] = False
+                verify_failures += 1
+                continue
+            
+            with open(verify_test_path, "r") as f:
+                verify_content = f.read()
+            
+            verify_test_data = {
+                "path": test_case['verify_test'],
+                "prompt": verify_content
+            }
+            
+            verify_result = await run_single_test_with_timeout(
+                computer=computer,
+                test_data=verify_test_data,
+                rp_client=rp_client,
+                launch_id=launch_id,
+                max_turns=max_turns,
+                jan_app_path=None,
+                jan_process_name="Jan.exe",
+                agent_config=agent_config,
+                enable_reportportal=enable_reportportal
+            )
+            
+            success = verify_result.get("success", False) if verify_result else False
+            batch_result["verify_results"][test_case_key] = success
+            
+            if success:
+                logger.info(f"✅ Verify {test_case_key}: SUCCESS")
+            else:
+                logger.error(f"❌ Verify {test_case_key}: FAILED")
+                verify_failures += 1
+            
+            # Small delay between verifications
+            time.sleep(3)
+        
+        batch_result["verification_phase_success"] = verify_failures == 0
+        logger.info(f"Verification phase complete: {len(assistant_test_cases) - verify_failures}/{len(assistant_test_cases)} successful")
+        
+        # Overall success calculation
+        batch_result["overall_success"] = (
+            batch_result["setup_phase_success"] and
+            batch_result["upgrade_success"] and
+            batch_result["verification_phase_success"]
+        )
+        
+        # Final summary
+        logger.info("=" * 100)
+        logger.info("ASSISTANT BATCH MIGRATION TEST SUMMARY")
+        logger.info("=" * 100)
+        logger.info(f"Overall Success: {batch_result['overall_success']}")
+        logger.info(f"Setup Phase: {batch_result['setup_phase_success']} ({len(assistant_test_cases) - setup_failures}/{len(assistant_test_cases)})")
+        logger.info(f"Upgrade Phase: {batch_result['upgrade_success']}")
+        logger.info(f"Verification Phase: {batch_result['verification_phase_success']} ({len(assistant_test_cases) - verify_failures}/{len(assistant_test_cases)})")
+        logger.info("")
+        logger.info("Detailed Results:")
+        for test_case_key in assistant_test_cases:
+            setup_status = "✅" if batch_result["setup_results"].get(test_case_key, False) else "❌"
+            verify_status = "✅" if batch_result["verify_results"].get(test_case_key, False) else "❌"
+            logger.info(f"  {test_case_key.ljust(20)}: Setup {setup_status} | Verify {verify_status}")
+            
+        return batch_result
+        
+    except Exception as e:
+        logger.error(f"Assistant batch migration test failed with exception: {e}")
+        batch_result["error_message"] = str(e)
+        return batch_result
+    finally:
+        # Cleanup
+        force_close_jan("Jan.exe")
+        force_close_jan("Jan-nightly.exe")
+
+async def run_all_migration_tests(computer, old_version_path, new_version_path, rp_client=None, 
+                                launch_id=None, max_turns=30, agent_config=None, enable_reportportal=False,
+                                test_cases=None):
+    """
+    Run multiple migration test cases
+    
+    Args:
+        test_cases: List of test case keys to run. If None, runs all test cases.
+    """
+    if test_cases is None:
+        test_cases = list(MIGRATION_TEST_CASES.keys())
+    
+    logger.info("=" * 100)
+    logger.info("RUNNING ALL MIGRATION TESTS")
+    logger.info("=" * 100)
+    logger.info(f"Test cases to run: {', '.join(test_cases)}")
+    
+    results = {}
+    overall_success = True
+    
+    for i, test_case_key in enumerate(test_cases, 1):
+        logger.info(f"\n[{i}/{len(test_cases)}] Starting migration test: {test_case_key}")
+        
+        result = await run_individual_migration_test(
+            computer=computer,
+            test_case_key=test_case_key,
+            old_version_path=old_version_path,
+            new_version_path=new_version_path,
+            rp_client=rp_client,
+            launch_id=launch_id,
+            max_turns=max_turns,
+            agent_config=agent_config,
+            enable_reportportal=enable_reportportal
+        )
+        
+        results[test_case_key] = result
+        if not result["overall_success"]:
+            overall_success = False
+        
+        # Add delay between test cases
+        if i < len(test_cases):
+            logger.info("Waiting 30 seconds before next migration test...")
+            time.sleep(30)
+    
+    # Final summary
+    logger.info("=" * 100)
+    logger.info("MIGRATION TESTS SUMMARY")
+    logger.info("=" * 100)
+    
+    passed = sum(1 for r in results.values() if r["overall_success"])
+    failed = len(results) - passed
+    
+    logger.info(f"Total tests: {len(results)}")
+    logger.info(f"Passed: {passed}")
+    logger.info(f"Failed: {failed}")
+    logger.info(f"Overall success: {overall_success}")
+    
+    for test_case_key, result in results.items():
+        status = "PASS" if result["overall_success"] else "FAIL" 
+        logger.info(f"  {test_case_key}: {status}")
+        if result["error_message"]:
+            logger.info(f"    Error: {result['error_message']}")
+    
+    return {
+        "overall_success": overall_success,
+        "total_tests": len(results),
+        "passed": passed,
+        "failed": failed,
+        "results": results
+    }
--- a/autoqa/main.py
+++ b/autoqa/main.py
@ -12,6 +12,8 @@ from reportportal_client.helpers import timestamp

 from utils import scan_test_files
 from test_runner import run_single_test_with_timeout
+from individual_migration_runner import run_individual_migration_test, run_all_migration_tests, MIGRATION_TEST_CASES
+from reliability_runner import run_reliability_test, run_reliability_tests

 # Configure logging
 logging.basicConfig(
@ -183,8 +185,21 @@ Examples:
  # Run with different model
  python main.py --model-name "gpt-4" --model-base-url "https://api.openai.com/v1"
  
+  # Reliability testing - development phase (5 runs)
+  python main.py --enable-reliability-test --reliability-phase development
+  
+  # Reliability testing - deployment phase (20 runs)
+  python main.py --enable-reliability-test --reliability-phase deployment
+  
+  # Reliability testing - custom number of runs
+  python main.py --enable-reliability-test --reliability-runs 10
+  
+  # Reliability testing - specific test file
+  python main.py --enable-reliability-test --reliability-test-path "tests/base/default-jan-assistant.txt"
+  
  # Using environment variables
  ENABLE_REPORTPORTAL=true RP_TOKEN=xxx MODEL_NAME=gpt-4 python main.py
+  ENABLE_RELIABILITY_TEST=true RELIABILITY_PHASE=deployment python main.py
        """
    )
    
@ -275,8 +290,8 @@ Examples:
    )
    test_group.add_argument(
        '--tests-dir',
-        default=os.getenv('TESTS_DIR', 'tests'),
-        help='Directory containing test files (env: TESTS_DIR, default: %(default)s)'
+        default=os.getenv('TESTS_DIR', 'tests/base'),
+        help='Directory containing test files for current version testing (env: TESTS_DIR, default: %(default)s for base tests, tests/migration for migration tests)'
    )
    test_group.add_argument(
        '--delay-between-tests',
@ -285,12 +300,99 @@ Examples:
        help='Delay in seconds between tests (env: DELAY_BETWEEN_TESTS, default: %(default)s)'
    )
    
+    # Migration testing arguments
+    migration_group = parser.add_argument_group('Migration Testing Configuration')
+    migration_group.add_argument(
+        '--enable-migration-test',
+        action='store_true',
+        default=os.getenv('ENABLE_MIGRATION_TEST', 'false').lower() == 'true',
+        help='Enable migration testing mode (env: ENABLE_MIGRATION_TEST, default: false)'
+    )
+    migration_group.add_argument(
+        '--old-version',
+        default=os.getenv('OLD_VERSION'),
+        help='Path to old version installer for migration testing (env: OLD_VERSION)'
+    )
+    migration_group.add_argument(
+        '--new-version',
+        default=os.getenv('NEW_VERSION'),
+        help='Path to new version installer for migration testing (env: NEW_VERSION)'
+    )
+    migration_group.add_argument(
+        '--migration-test-case',
+        default=os.getenv('MIGRATION_TEST_CASE'),
+        help='Specific migration test case(s) to run. Can be a single case or comma-separated list (e.g., "assistants" or "models,threads"). Available cases: appearance, threads, models, assistants, assistant-chat, assistants-complete, mcp-servers, local-api, proxy-settings, thread-conversations. Note: "assistants-complete" is only supported in batch mode. If not specified, runs all test cases. Use --list-migration-tests to see all available cases. (env: MIGRATION_TEST_CASE)'
+    )
+    migration_group.add_argument(
+        '--migration-batch-mode',
+        action='store_true',
+        default=os.getenv('MIGRATION_BATCH_MODE', 'false').lower() == 'true',
+        help='Use batch migration mode: setup all → upgrade → verify all (env: MIGRATION_BATCH_MODE, default: false - uses individual mode)'
+    )
+    migration_group.add_argument(
+        '--list-migration-tests',
+        action='store_true',
+        help='List available migration test cases and exit'
+    )
+    
+    # Reliability testing arguments
+    reliability_group = parser.add_argument_group('Reliability Testing Configuration')
+    reliability_group.add_argument(
+        '--enable-reliability-test',
+        action='store_true',
+        default=os.getenv('ENABLE_RELIABILITY_TEST', 'false').lower() == 'true',
+        help='Enable reliability testing mode (env: ENABLE_RELIABILITY_TEST, default: false)'
+    )
+    reliability_group.add_argument(
+        '--reliability-phase',
+        choices=['development', 'deployment'],
+        default=os.getenv('RELIABILITY_PHASE', 'development'),
+        help='Reliability testing phase: development (5 runs) or deployment (20 runs) (env: RELIABILITY_PHASE, default: development)'
+    )
+    reliability_group.add_argument(
+        '--reliability-runs',
+        type=int,
+        default=int(os.getenv('RELIABILITY_RUNS', '0')),
+        help='Custom number of runs for reliability testing (overrides phase setting) (env: RELIABILITY_RUNS, default: 0)'
+    )
+    reliability_group.add_argument(
+        '--reliability-test-path',
+        default=os.getenv('RELIABILITY_TEST_PATH'),
+        help='Specific test file path for reliability testing (env: RELIABILITY_TEST_PATH, if not specified, uses --tests-dir)'
+    )
+    
    args = parser.parse_args()
    
+    # Handle list migration tests
+    if args.list_migration_tests:
+        print("Available migration test cases:")
+        print("=" * 50)
+        for key, test_case in MIGRATION_TEST_CASES.items():
+            print(f"  {key}:")
+            print(f"    Name: {test_case['name']}")
+            print(f"    Description: {test_case['description']}")
+            print()
+        exit(0)
+    
    # Validate ReportPortal token if ReportPortal is enabled
    if args.enable_reportportal and not args.rp_token:
        parser.error("--rp-token (or RP_TOKEN env var) is required when --enable-reportportal is used")
    
+    # Validate migration test arguments
+    if args.enable_migration_test:
+        if not args.old_version:
+            parser.error("--old-version (or OLD_VERSION env var) is required when --enable-migration-test is used")
+        if not args.new_version:
+            parser.error("--new-version (or NEW_VERSION env var) is required when --enable-migration-test is used")
+        if not os.path.exists(args.old_version):
+            parser.error(f"Old version installer not found: {args.old_version}")
+        if not os.path.exists(args.new_version):
+            parser.error(f"New version installer not found: {args.new_version}")
+        
+        # Validate specific test case if provided
+        if args.migration_test_case and args.migration_test_case not in MIGRATION_TEST_CASES:
+            parser.error(f"Unknown migration test case: {args.migration_test_case}. Use --list-migration-tests to see available test cases.")
+    
    return args

 async def main():
@ -324,6 +426,7 @@ async def main():
        
        # Log configuration
        logger.info("=== Configuration ===")
+        logger.info(f"Testing Mode: {'MIGRATION (old → new version)' if args.enable_migration_test else 'BASE (current version)'}")
        logger.info(f"Computer server: {'STARTED' if server_thread else 'EXTERNAL'}")
        logger.info(f"Tests directory: {args.tests_dir}")
        logger.info(f"Max turns per test: {args.max_turns}")
@ -340,20 +443,23 @@ async def main():
            logger.info(f"ReportPortal project: {args.rp_project}")
            logger.info(f"ReportPortal token: {'SET' if args.rp_token else 'NOT SET'}")
            logger.info(f"Launch name: {args.launch_name if args.launch_name else 'AUTO-GENERATED'}")
+        logger.info(f"Migration testing: {'ENABLED' if args.enable_migration_test else 'DISABLED'}")
+        if args.enable_migration_test:
+            logger.info(f"Old version installer: {args.old_version}")
+            logger.info(f"New version installer: {args.new_version}")
+        logger.info(f"Reliability testing: {'ENABLED' if args.enable_reliability_test else 'DISABLED'}")
+        if args.enable_reliability_test:
+            logger.info(f"Reliability phase: {args.reliability_phase}")
+            if args.reliability_runs > 0:
+                logger.info(f"Custom runs: {args.reliability_runs}")
+            else:
+                logger.info(f"Phase runs: {5 if args.reliability_phase == 'development' else 20}")
+            if args.reliability_test_path:
+                logger.info(f"Specific test path: {args.reliability_test_path}")
+            else:
+                logger.info(f"Tests directory: {args.tests_dir}")
        logger.info("======================")
        
-        # Scan all test files
-        test_files = scan_test_files(args.tests_dir)
-        
-        if not test_files:
-            logger.warning(f"No test files found in directory: {args.tests_dir}")
-            return
-        
-        logger.info(f"Found {len(test_files)} test files")
-        
-        # Track test results for final exit code
-        test_results = {"passed": 0, "failed": 0, "total": len(test_files)}
-        
        # Initialize ReportPortal client only if enabled
        rp_client = None
        launch_id = None
@ -408,80 +514,286 @@ async def main():
        await computer.run()
        logger.info("Computer environment ready")
        
-        # Run each test sequentially with turn monitoring
-        for i, test_data in enumerate(test_files, 1):
-            logger.info(f"Running test {i}/{len(test_files)}: {test_data['path']}")
+        # Check if reliability testing is enabled
+        if args.enable_reliability_test:
+            logger.info("=" * 60)
+            logger.info("RELIABILITY TESTING MODE ENABLED")
+            logger.info("=" * 60)
+            logger.info(f"Phase: {args.reliability_phase}")
+            if args.reliability_runs > 0:
+                logger.info(f"Custom runs: {args.reliability_runs}")
+            else:
+                logger.info(f"Phase runs: {5 if args.reliability_phase == 'development' else 20}")
            
-            try:
-                # Pass all configs to test runner
-                test_result = await run_single_test_with_timeout(
-                    computer=computer, 
-                    test_data=test_data, 
-                    rp_client=rp_client,  # Can be None
-                    launch_id=launch_id,  # Can be None
-                    max_turns=args.max_turns,
-                    jan_app_path=args.jan_app_path,
-                    jan_process_name=args.jan_process_name,
-                    agent_config=agent_config,
+            # Determine test paths for reliability testing
+            if args.reliability_test_path:
+                # Use specific test path
+                if not os.path.exists(args.reliability_test_path):
+                    logger.error(f"Reliability test file not found: {args.reliability_test_path}")
+                    final_exit_code = 1
+                    return final_exit_code
+                test_paths = [args.reliability_test_path]
+                logger.info(f"Running reliability test on specific file: {args.reliability_test_path}")
+            else:
+                # Use tests directory
+                test_files = scan_test_files(args.tests_dir)
+                if not test_files:
+                    logger.warning(f"No test files found in directory: {args.tests_dir}")
+                    return
+                test_paths = [test_data['path'] for test_data in test_files]
+                logger.info(f"Running reliability tests on {len(test_paths)} test files from: {args.tests_dir}")
+            
+            # Run reliability tests
+            reliability_results = await run_reliability_tests(
+                computer=computer,
+                test_paths=test_paths,
+                rp_client=rp_client,
+                launch_id=launch_id,
+                max_turns=args.max_turns,
+                jan_app_path=args.jan_app_path,
+                jan_process_name=args.jan_process_name,
+                agent_config=agent_config,
+                enable_reportportal=args.enable_reportportal,
+                phase=args.reliability_phase,
+                runs=args.reliability_runs if args.reliability_runs > 0 else None
+            )
+            
+            # Handle reliability test results
+            if reliability_results and reliability_results.get("overall_success", False):
+                logger.info(f"[SUCCESS] Reliability testing completed successfully!")
+                final_exit_code = 0
+            else:
+                logger.error(f"[FAILED] Reliability testing failed!")
+                if reliability_results and reliability_results.get("error_message"):
+                    logger.error(f"Error: {reliability_results['error_message']}")
+                final_exit_code = 1
+            
+            # Skip regular test execution in reliability mode
+            logger.info("Reliability testing completed. Skipping regular test execution.")
+            
+        # Check if migration testing is enabled
+        elif args.enable_migration_test:
+            logger.info("=" * 60)
+            logger.info("MIGRATION TESTING MODE ENABLED")
+            logger.info("=" * 60)
+            logger.info(f"Old version installer: {args.old_version}")
+            logger.info(f"New version installer: {args.new_version}")
+            logger.info(f"Migration mode: {'BATCH (all setups → upgrade → all verifies)' if args.migration_batch_mode else 'INDIVIDUAL (setup → upgrade → verify per test)'}")
+            
+            if args.migration_test_case:
+                # Parse comma-separated test cases
+                test_cases = [case.strip() for case in args.migration_test_case.split(',')]
+                logger.info(f"Running specific test case(s): {', '.join(test_cases)}")
+                
+                # Validate all test cases exist
+                for test_case in test_cases:
+                    if test_case not in MIGRATION_TEST_CASES:
+                        logger.error(f"Unknown test case: {test_case}")
+                        logger.error(f"Available test cases: {', '.join(MIGRATION_TEST_CASES.keys())}")
+                        final_exit_code = 1
+                        return final_exit_code
+                
+                if args.migration_batch_mode:
+                    # Import and run batch migration with specified test cases
+                    from batch_migration_runner import run_batch_migration_test
+                    
+                    migration_results = await run_batch_migration_test(
+                        computer=computer,
+                        old_version_path=args.old_version,
+                        new_version_path=args.new_version,
+                        rp_client=rp_client,
+                        launch_id=launch_id,
+                        max_turns=args.max_turns,
+                        agent_config=agent_config,
+                        enable_reportportal=args.enable_reportportal,
+                        test_cases=test_cases  # Multiple test cases in batch mode
+                    )
+                    
+                    # Handle batch test result  
+                    if migration_results and migration_results.get("overall_success", False):
+                        logger.info(f"[SUCCESS] Batch migration test '{', '.join(test_cases)}' completed successfully!")
+                        final_exit_code = 0
+                    else:
+                        logger.error(f"[FAILED] Batch migration test '{', '.join(test_cases)}' failed!")
+                        if migration_results and migration_results.get("error_message"):
+                            logger.error(f"Error: {migration_results['error_message']}")
+                        final_exit_code = 1
+                else:
+                    # Run individual migration tests for each specified test case
+                    all_individual_results = []
+                    overall_individual_success = True
+                    
+                    for test_case in test_cases:
+                        logger.info(f"Running individual migration test for: {test_case}")
+                        migration_result = await run_individual_migration_test(
+                            computer=computer,
+                            test_case_key=test_case,
+                            old_version_path=args.old_version,
+                            new_version_path=args.new_version,
+                            rp_client=rp_client,
+                            launch_id=launch_id,
+                            max_turns=args.max_turns,
+                            agent_config=agent_config,
+                            enable_reportportal=args.enable_reportportal
+                        )
+                        
+                        all_individual_results.append(migration_result)
+                        if not (migration_result and migration_result.get("overall_success", False)):
+                            overall_individual_success = False
+                    
+                    # Handle individual test results
+                    if overall_individual_success:
+                        logger.info(f"[SUCCESS] All individual migration tests '{', '.join(test_cases)}' completed successfully!")
+                        final_exit_code = 0
+                    else:
+                        logger.error(f"[FAILED] One or more individual migration tests '{', '.join(test_cases)}' failed!")
+                        for i, result in enumerate(all_individual_results):
+                            if result and result.get("error_message"):
+                                logger.error(f"Error in {test_cases[i]}: {result['error_message']}")
+                        final_exit_code = 1
+            else:
+                logger.info("Running all migration test cases")
+                
+                if args.migration_batch_mode:
+                    # Import and run batch migration runner
+                    from batch_migration_runner import run_batch_migration_test
+                    
+                    migration_results = await run_batch_migration_test(
+                        computer=computer,
+                        old_version_path=args.old_version,
+                        new_version_path=args.new_version,
+                        rp_client=rp_client,
+                        launch_id=launch_id,
+                        max_turns=args.max_turns,
+                        agent_config=agent_config,
+                        enable_reportportal=args.enable_reportportal
+                    )
+                else:
+                    # Run all migration tests individually
+                    migration_results = await run_all_migration_tests(
+                        computer=computer,
+                        old_version_path=args.old_version,
+                        new_version_path=args.new_version,
+                        rp_client=rp_client,
+                        launch_id=launch_id,
+                        max_turns=args.max_turns,
+                        agent_config=agent_config,
                    enable_reportportal=args.enable_reportportal
                )
                
-                # Track test result - properly handle different return formats
-                test_passed = False
-                
-                if test_result:
-                    # Check different possible return formats
-                    if isinstance(test_result, dict):
-                        # Dictionary format: check 'success' key
-                        test_passed = test_result.get('success', False)
-                    elif isinstance(test_result, bool):
-                        # Boolean format: direct boolean value
-                        test_passed = test_result
-                    elif hasattr(test_result, 'success'):
-                        # Object format: check success attribute
-                        test_passed = getattr(test_result, 'success', False)
-                    else:
-                        # Any truthy value is considered success
-                        test_passed = bool(test_result)
+                # Handle overall results
+                if migration_results and migration_results.get("overall_success", False):
+                    logger.info("[SUCCESS] All migration tests completed successfully!")
+                    final_exit_code = 0
                else:
-                    test_passed = False
-                
-                # Update counters and log result
-                if test_passed:
-                    test_results["passed"] += 1
-                    logger.info(f"[SUCCESS] Test {i} PASSED: {test_data['path']}")
-                else:
-                    test_results["failed"] += 1
-                    logger.error(f"[FAILED] Test {i} FAILED: {test_data['path']}")
-                    
-                # Debug log for troubleshooting
-                logger.info(f"[INFO] Debug - Test result: type={type(test_result)}, value={test_result}, success_field={test_result.get('success', 'N/A') if isinstance(test_result, dict) else 'N/A'}, final_passed={test_passed}")
-                    
-            except Exception as e:
-                test_results["failed"] += 1
-                logger.error(f"[FAILED] Test {i} FAILED with exception: {test_data['path']} - {e}")
+                    logger.error("[FAILED] One or more migration tests failed!")
+                    if migration_results:
+                        logger.error(f"Failed {migration_results.get('failed', 0)} out of {migration_results.get('total_tests', 0)} tests")
+                    final_exit_code = 1
+            
+            # Skip regular test execution in migration mode
+            logger.info("Migration testing completed. Skipping regular test execution.")
            
-            # Add delay between tests
-            if i < len(test_files):
-                logger.info(f"Waiting {args.delay_between_tests} seconds before next test...")
-                await asyncio.sleep(args.delay_between_tests)
-        
-        # Log final test results summary
-        logger.info("=" * 50)
-        logger.info("TEST EXECUTION SUMMARY")
-        logger.info("=" * 50)
-        logger.info(f"Total tests: {test_results['total']}")
-        logger.info(f"Passed: {test_results['passed']}")
-        logger.info(f"Failed: {test_results['failed']}")
-        logger.info(f"Success rate: {(test_results['passed']/test_results['total']*100):.1f}%")
-        logger.info("=" * 50)
-        
-        if test_results["failed"] > 0:
-            logger.error(f"[FAILED] Test execution completed with {test_results['failed']} failures!")
-            final_exit_code = 1
        else:
-            logger.info("[SUCCESS] All tests completed successfully!")
-            final_exit_code = 0
+            # Regular test execution mode (base/current version testing)
+            logger.info("Running base test execution mode (current version testing)")
+            
+            # Use base tests directory if default tests_dir is being used
+            base_tests_dir = args.tests_dir
+            if args.tests_dir == 'tests/base' and not os.path.exists(args.tests_dir):
+                # Fallback to old structure if base directory doesn't exist
+                if os.path.exists('tests'):
+                    base_tests_dir = 'tests'
+                    logger.warning("tests/base directory not found, using 'tests' as fallback")
+            
+            logger.info(f"Using test directory: {base_tests_dir}")
+            
+            # Scan all test files
+            test_files = scan_test_files(base_tests_dir)
+            
+            if not test_files:
+                logger.warning(f"No test files found in directory: {base_tests_dir}")
+                return
+            
+            logger.info(f"Found {len(test_files)} test files")
+            
+            # Track test results for final exit code
+            test_results = {"passed": 0, "failed": 0, "total": len(test_files)}
+            
+            # Run each test sequentially with turn monitoring
+            for i, test_data in enumerate(test_files, 1):
+                logger.info(f"Running test {i}/{len(test_files)}: {test_data['path']}")
+                
+                try:
+                    # Pass all configs to test runner
+                    test_result = await run_single_test_with_timeout(
+                        computer=computer, 
+                        test_data=test_data, 
+                        rp_client=rp_client,  # Can be None
+                        launch_id=launch_id,  # Can be None
+                        max_turns=args.max_turns,
+                        jan_app_path=args.jan_app_path,
+                        jan_process_name=args.jan_process_name,
+                        agent_config=agent_config,
+                        enable_reportportal=args.enable_reportportal
+                    )
+                    
+                    # Track test result - properly handle different return formats
+                    test_passed = False
+                    
+                    if test_result:
+                        # Check different possible return formats
+                        if isinstance(test_result, dict):
+                            # Dictionary format: check 'success' key
+                            test_passed = test_result.get('success', False)
+                        elif isinstance(test_result, bool):
+                            # Boolean format: direct boolean value
+                            test_passed = test_result
+                        elif hasattr(test_result, 'success'):
+                            # Object format: check success attribute
+                            test_passed = getattr(test_result, 'success', False)
+                        else:
+                            # Any truthy value is considered success
+                            test_passed = bool(test_result)
+                    else:
+                        test_passed = False
+                    
+                    # Update counters and log result
+                    if test_passed:
+                        test_results["passed"] += 1
+                        logger.info(f"[SUCCESS] Test {i} PASSED: {test_data['path']}")
+                    else:
+                        test_results["failed"] += 1
+                        logger.error(f"[FAILED] Test {i} FAILED: {test_data['path']}")
+                        
+                    # Debug log for troubleshooting
+                    logger.info(f"[INFO] Debug - Test result: type={type(test_result)}, value={test_result}, success_field={test_result.get('success', 'N/A') if isinstance(test_result, dict) else 'N/A'}, final_passed={test_passed}")
+                        
+                except Exception as e:
+                    test_results["failed"] += 1
+                    logger.error(f"[FAILED] Test {i} FAILED with exception: {test_data['path']} - {e}")
+                
+                # Add delay between tests
+                if i < len(test_files):
+                    logger.info(f"Waiting {args.delay_between_tests} seconds before next test...")
+                    await asyncio.sleep(args.delay_between_tests)
+            
+            # Log final test results summary
+            logger.info("=" * 50)
+            logger.info("TEST EXECUTION SUMMARY")
+            logger.info("=" * 50)
+            logger.info(f"Total tests: {test_results['total']}")
+            logger.info(f"Passed: {test_results['passed']}")
+            logger.info(f"Failed: {test_results['failed']}")
+            logger.info(f"Success rate: {(test_results['passed']/test_results['total']*100):.1f}%")
+            logger.info("=" * 50)
+            
+            if test_results["failed"] > 0:
+                logger.error(f"[FAILED] Test execution completed with {test_results['failed']} failures!")
+                final_exit_code = 1
+            else:
+                logger.info("[SUCCESS] All tests completed successfully!")
+                final_exit_code = 0
        
    except KeyboardInterrupt:
        logger.info("Test execution interrupted by user")
--- a/autoqa/migration_utils.py
+++ b/autoqa/migration_utils.py
@ -0,0 +1,67 @@
+import os
+import logging
+import subprocess
+import time
+from pathlib import Path
+from utils import force_close_jan, is_jan_running, start_jan_app
+
+logger = logging.getLogger(__name__)
+
+def install_jan_version(installer_path, version_type="old"):
+    """
+    Install a specific version of Jan
+    
+    Args:
+        installer_path: Path to the installer file
+        version_type: "old" or "new" for logging purposes
+    """
+    logger.info(f"Installing Jan {version_type} version from: {installer_path}")
+    
+    # Force close any running Jan instances first
+    force_close_jan("Jan.exe")
+    force_close_jan("Jan-nightly.exe")
+    
+    try:
+        if installer_path.endswith('.exe'):
+            # Windows installer
+            subprocess.run([installer_path, '/S'], check=True)
+        elif installer_path.endswith('.deb'):
+            # Ubuntu installer
+            subprocess.run(['sudo', 'dpkg', '-i', installer_path], check=True)
+        elif installer_path.endswith('.dmg'):
+            # macOS installer - need to mount and copy
+            subprocess.run(['hdiutil', 'attach', installer_path], check=True)
+            # This is simplified - actual implementation would need to handle dmg mounting
+            
+        logger.info(f"Successfully installed Jan {version_type} version")
+        
+        # Wait for installation to complete
+        time.sleep(30)
+        
+    except subprocess.CalledProcessError as e:
+        logger.error(f"Failed to install Jan {version_type} version: {e}")
+        raise
+    except Exception as e:
+        logger.error(f"Unexpected error installing Jan {version_type} version: {e}")
+        raise
+
+# Backup functionality removed as it's redundant for direct persistence testing
+
+def prepare_migration_environment():
+    """
+    Prepare environment for migration testing
+    """
+    logger.info("Preparing migration test environment...")
+    
+    # Create migration logs directory
+    migration_logs_dir = "migration_logs"
+    os.makedirs(migration_logs_dir, exist_ok=True)
+    
+    # Create migration artifacts directory  
+    migration_artifacts_dir = "migration_artifacts"
+    os.makedirs(migration_artifacts_dir, exist_ok=True)
+    
+    return {
+        "logs_dir": migration_logs_dir,
+        "artifacts_dir": migration_artifacts_dir
+    }
--- a/autoqa/reliability_runner.py
+++ b/autoqa/reliability_runner.py
@ -0,0 +1,334 @@
+import asyncio
+import logging
+import os
+import time
+from datetime import datetime
+from pathlib import Path
+
+from test_runner import run_single_test_with_timeout
+from utils import scan_test_files
+
+logger = logging.getLogger(__name__)
+
+async def run_reliability_test(computer, test_path, rp_client=None, launch_id=None, 
+                              max_turns=30, jan_app_path=None, jan_process_name="Jan.exe", 
+                              agent_config=None, enable_reportportal=False, 
+                              phase="development", runs=5):
+    """
+    Run a single test case multiple times to verify reliability and stability
+    
+    Args:
+        computer: Computer agent instance
+        test_path: Path to the test file to run
+        rp_client: ReportPortal client (optional)
+        launch_id: ReportPortal launch ID (optional)
+        max_turns: Maximum turns per test
+        jan_app_path: Path to Jan application
+        jan_process_name: Jan process name for monitoring
+        agent_config: Agent configuration
+        enable_reportportal: Whether to upload to ReportPortal
+        phase: "development" (5 runs) or "deployment" (20 runs)
+        runs: Number of runs to execute (overrides phase if specified)
+    
+    Returns:
+        dict with reliability test results
+    """
+    # Determine number of runs based on phase
+    if phase == "development":
+        target_runs = 5
+    elif phase == "deployment":
+        target_runs = 20
+    else:
+        target_runs = runs
+    
+    logger.info("=" * 100)
+    logger.info(f"RELIABILITY TESTING: {test_path.upper()}")
+    logger.info("=" * 100)
+    logger.info(f"Phase: {phase.upper()}")
+    logger.info(f"Target runs: {target_runs}")
+    logger.info(f"Test file: {test_path}")
+    logger.info("")
+    
+    # Load test content
+    if not os.path.exists(test_path):
+        raise FileNotFoundError(f"Test file not found: {test_path}")
+    
+    with open(test_path, "r", encoding="utf-8") as f:
+        test_content = f.read()
+    
+    test_data = {
+        "path": test_path,
+        "prompt": test_content
+    }
+    
+    # Initialize results tracking
+    reliability_results = {
+        "test_path": test_path,
+        "phase": phase,
+        "target_runs": target_runs,
+        "completed_runs": 0,
+        "successful_runs": 0,
+        "failed_runs": 0,
+        "run_details": [],
+        "start_time": datetime.now(),
+        "end_time": None,
+        "success_rate": 0.0,
+        "overall_success": False
+    }
+    
+    logger.info(f"Starting reliability testing with {target_runs} runs...")
+    logger.info("=" * 80)
+    
+    try:
+        for run_number in range(1, target_runs + 1):
+            logger.info(f"Run {run_number}/{target_runs}")
+            logger.info("-" * 40)
+            
+            run_start_time = datetime.now()
+            
+            try:
+                # Run the test
+                test_result = await run_single_test_with_timeout(
+                    computer=computer,
+                    test_data=test_data,
+                    rp_client=rp_client,
+                    launch_id=launch_id,
+                    max_turns=max_turns,
+                    jan_app_path=jan_app_path,
+                    jan_process_name=jan_process_name,
+                    agent_config=agent_config,
+                    enable_reportportal=enable_reportportal
+                )
+                
+                # Extract success status
+                success = False
+                if test_result:
+                    if isinstance(test_result, dict):
+                        success = test_result.get('success', False)
+                    elif isinstance(test_result, bool):
+                        success = test_result
+                    elif hasattr(test_result, 'success'):
+                        success = getattr(test_result, 'success', False)
+                    else:
+                        success = bool(test_result)
+                
+                run_end_time = datetime.now()
+                run_duration = (run_end_time - run_start_time).total_seconds()
+                
+                # Record run result
+                run_result = {
+                    "run_number": run_number,
+                    "success": success,
+                    "start_time": run_start_time,
+                    "end_time": run_end_time,
+                    "duration_seconds": run_duration,
+                    "test_result": test_result
+                }
+                
+                reliability_results["run_details"].append(run_result)
+                reliability_results["completed_runs"] += 1
+                
+                if success:
+                    reliability_results["successful_runs"] += 1
+                    logger.info(f"✅ Run {run_number}: SUCCESS ({run_duration:.1f}s)")
+                else:
+                    reliability_results["failed_runs"] += 1
+                    logger.error(f"❌ Run {run_number}: FAILED ({run_duration:.1f}s)")
+                
+                # Calculate current success rate
+                current_success_rate = (reliability_results["successful_runs"] / reliability_results["completed_runs"]) * 100
+                logger.info(f"Current success rate: {reliability_results['successful_runs']}/{reliability_results['completed_runs']} ({current_success_rate:.1f}%)")
+                
+            except Exception as e:
+                run_end_time = datetime.now()
+                run_duration = (run_end_time - run_start_time).total_seconds()
+                
+                # Record failed run
+                run_result = {
+                    "run_number": run_number,
+                    "success": False,
+                    "start_time": run_start_time,
+                    "end_time": run_end_time,
+                    "duration_seconds": run_duration,
+                    "error": str(e)
+                }
+                
+                reliability_results["run_details"].append(run_result)
+                reliability_results["completed_runs"] += 1
+                reliability_results["failed_runs"] += 1
+                
+                logger.error(f"❌ Run {run_number}: EXCEPTION ({run_duration:.1f}s) - {e}")
+                
+                # Calculate current success rate
+                current_success_rate = (reliability_results["successful_runs"] / reliability_results["completed_runs"]) * 100
+                logger.info(f"Current success rate: {reliability_results['successful_runs']}/{reliability_results['completed_runs']} ({current_success_rate:.1f}%)")
+            
+            # Add delay between runs (except for the last run)
+            if run_number < target_runs:
+                delay_seconds = 5
+                logger.info(f"Waiting {delay_seconds} seconds before next run...")
+                await asyncio.sleep(delay_seconds)
+        
+        # Final calculations
+        reliability_results["end_time"] = datetime.now()
+        total_duration = (reliability_results["end_time"] - reliability_results["start_time"]).total_seconds()
+        reliability_results["total_duration_seconds"] = total_duration
+        
+        if reliability_results["completed_runs"] > 0:
+            reliability_results["success_rate"] = (reliability_results["successful_runs"] / reliability_results["completed_runs"]) * 100
+        
+        # Determine overall success based on phase
+        if phase == "development":
+            # Development phase: 80% success rate required
+            reliability_results["overall_success"] = reliability_results["success_rate"] >= 80.0
+        else:
+            # Deployment phase: 90% success rate required
+            reliability_results["overall_success"] = reliability_results["success_rate"] >= 90.0
+        
+        # Print final summary
+        logger.info("=" * 80)
+        logger.info("RELIABILITY TEST SUMMARY")
+        logger.info("=" * 80)
+        logger.info(f"Test: {test_path}")
+        logger.info(f"Phase: {phase.upper()}")
+        logger.info(f"Completed runs: {reliability_results['completed_runs']}/{target_runs}")
+        logger.info(f"Successful runs: {reliability_results['successful_runs']}")
+        logger.info(f"Failed runs: {reliability_results['failed_runs']}")
+        logger.info(f"Success rate: {reliability_results['success_rate']:.1f}%")
+        logger.info(f"Total duration: {total_duration:.1f} seconds")
+        logger.info(f"Average duration per run: {total_duration / reliability_results['completed_runs']:.1f} seconds")
+        logger.info(f"Overall result: {'✅ PASSED' if reliability_results['overall_success'] else '❌ FAILED'}")
+        
+        # Phase-specific requirements
+        if phase == "development":
+            logger.info("Development phase requirement: ≥80% success rate")
+        else:
+            logger.info("Deployment phase requirement: ≥90% success rate")
+        
+        return reliability_results
+        
+    except Exception as e:
+        logger.error(f"Reliability testing failed with exception: {e}")
+        reliability_results["end_time"] = datetime.now()
+        reliability_results["error_message"] = str(e)
+        return reliability_results
+
+async def run_reliability_tests(computer, test_paths, rp_client=None, launch_id=None, 
+                               max_turns=30, jan_app_path=None, jan_process_name="Jan.exe", 
+                               agent_config=None, enable_reportportal=False, 
+                               phase="development", runs=None):
+    """
+    Run reliability tests for multiple test files
+    
+    Args:
+        computer: Computer agent instance
+        test_paths: List of test file paths or single path
+        rp_client: ReportPortal client (optional)
+        launch_id: ReportPortal launch ID (optional)
+        max_turns: Maximum turns per test
+        jan_app_path: Path to Jan application
+        jan_process_name: Jan process name for monitoring
+        agent_config: Agent configuration
+        enable_reportportal: Whether to upload to ReportPortal
+        phase: "development" (5 runs) or "deployment" (20 runs)
+        runs: Number of runs to execute (overrides phase if specified)
+    
+    Returns:
+        dict with overall reliability test results
+    """
+    # Convert single path to list
+    if isinstance(test_paths, str):
+        test_paths = [test_paths]
+    
+    logger.info("=" * 100)
+    logger.info("RELIABILITY TESTING SUITE")
+    logger.info("=" * 100)
+    logger.info(f"Phase: {phase.upper()}")
+    logger.info(f"Test files: {len(test_paths)}")
+    logger.info(f"Test paths: {', '.join(test_paths)}")
+    logger.info("")
+    
+    overall_results = {
+        "phase": phase,
+        "total_tests": len(test_paths),
+        "completed_tests": 0,
+        "passed_tests": 0,
+        "failed_tests": 0,
+        "test_results": {},
+        "start_time": datetime.now(),
+        "end_time": None,
+        "overall_success": False
+    }
+    
+    try:
+        for i, test_path in enumerate(test_paths, 1):
+            logger.info(f"Starting reliability test {i}/{len(test_paths)}: {test_path}")
+            
+            test_result = await run_reliability_test(
+                computer=computer,
+                test_path=test_path,
+                rp_client=rp_client,
+                launch_id=launch_id,
+                max_turns=max_turns,
+                jan_app_path=jan_app_path,
+                jan_process_name=jan_process_name,
+                agent_config=agent_config,
+                enable_reportportal=enable_reportportal,
+                phase=phase,
+                runs=runs
+            )
+            
+            overall_results["test_results"][test_path] = test_result
+            overall_results["completed_tests"] += 1
+            
+            if test_result and test_result.get("overall_success", False):
+                overall_results["passed_tests"] += 1
+                logger.info(f"✅ Test {i} PASSED: {test_path}")
+            else:
+                overall_results["failed_tests"] += 1
+                logger.error(f"❌ Test {i} FAILED: {test_path}")
+            
+            # Add delay between tests (except for the last test)
+            if i < len(test_paths):
+                delay_seconds = 10
+                logger.info(f"Waiting {delay_seconds} seconds before next test...")
+                await asyncio.sleep(delay_seconds)
+        
+        # Final calculations
+        overall_results["end_time"] = datetime.now()
+        total_duration = (overall_results["end_time"] - overall_results["start_time"]).total_seconds()
+        overall_results["total_duration_seconds"] = total_duration
+        
+        if overall_results["completed_tests"] > 0:
+            overall_results["overall_success"] = overall_results["failed_tests"] == 0
+        
+        # Print overall summary
+        logger.info("=" * 100)
+        logger.info("RELIABILITY TESTING SUITE SUMMARY")
+        logger.info("=" * 100)
+        logger.info(f"Phase: {phase.upper()}")
+        logger.info(f"Total tests: {overall_results['total_tests']}")
+        logger.info(f"Completed tests: {overall_results['completed_tests']}")
+        logger.info(f"Passed tests: {overall_results['passed_tests']}")
+        logger.info(f"Failed tests: {overall_results['failed_tests']}")
+        logger.info(f"Total duration: {total_duration:.1f} seconds")
+        logger.info(f"Overall result: {'✅ PASSED' if overall_results['overall_success'] else '❌ FAILED'}")
+        
+        # Individual test results
+        logger.info("")
+        logger.info("Individual Test Results:")
+        for test_path, test_result in overall_results["test_results"].items():
+            if test_result:
+                status = "✅ PASSED" if test_result.get("overall_success", False) else "❌ FAILED"
+                success_rate = test_result.get("success_rate", 0.0)
+                logger.info(f"  {test_path}: {status} ({success_rate:.1f}% success rate)")
+            else:
+                logger.info(f"  {test_path}: ❌ ERROR (no result)")
+        
+        return overall_results
+        
+    except Exception as e:
+        logger.error(f"Reliability testing suite failed with exception: {e}")
+        overall_results["end_time"] = datetime.now()
+        overall_results["error_message"] = str(e)
+        return overall_results
--- a/autoqa/reportportal_handler.py
+++ b/autoqa/reportportal_handler.py
@ -114,46 +114,103 @@ def extract_test_result_from_trajectory(trajectory_dir):
        
        logger.info(f"Checking result in last turn: {last_turn}")
        
-        # Look for API call response files
+        # Look for agent response files first (preferred), then fall back to response files
+        agent_response_files = [f for f in os.listdir(last_turn_path) 
+                               if f.startswith("api_call_") and f.endswith("_agent_response.json")]
        response_files = [f for f in os.listdir(last_turn_path) 
                         if f.startswith("api_call_") and f.endswith("_response.json")]
        
-        if not response_files:
+        # Prefer agent_response files, but fall back to response files if needed
+        if agent_response_files:
+            target_files = agent_response_files
+            file_type = "agent_response"
+        elif response_files:
+            target_files = response_files
+            file_type = "response"
+        else:
            logger.warning("No API response files found in last turn")
            return False
        
        # Check the last response file
-        last_response_file = sorted(response_files)[-1]
+        last_response_file = sorted(target_files)[-1]
        response_file_path = os.path.join(last_turn_path, last_response_file)
        
-        logger.info(f"Checking response file: {last_response_file}")
+        logger.info(f"Checking {file_type} file: {last_response_file}")
        
        with open(response_file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
-        # Extract content from response
-        if 'response' in data and 'choices' in data['response'] and data['response']['choices']:
-            last_choice = data['response']['choices'][-1]
-            if 'message' in last_choice and 'content' in last_choice['message']:
-                content = last_choice['message']['content']
-                logger.info(f"Last response content: {content}")
-                
-                # Look for result patterns - need to check both True and False
-                true_pattern = r'\{\s*"result"\s*:\s*True\s*\}'
-                false_pattern = r'\{\s*"result"\s*:\s*False\s*\}'
-                
-                true_match = re.search(true_pattern, content)
-                false_match = re.search(false_pattern, content)
-                
-                if true_match:
-                    logger.info(f"Found test result: True - PASSED")
-                    return True
-                elif false_match:
-                    logger.info(f"Found test result: False - FAILED")
-                    return False
-                else:
-                    logger.warning("No valid result pattern found in response content - marking as FAILED")
-                    return False
+        # Extract content from response - handle both agent_response and response formats
+        content = None
+        if file_type == "agent_response":
+            logger.info(f"Processing agent_response file with keys: {list(data.keys())}")
+            
+            # For agent_response.json: look in multiple possible locations
+            if 'response' in data and 'choices' in data['response'] and data['response']['choices']:
+                last_choice = data['response']['choices'][-1]
+                if 'message' in last_choice and 'content' in last_choice['message']:
+                    content = last_choice['message']['content']
+                    logger.info(f"Found content in response.choices[].message.content: {content}")
+            
+            # Also check in output array for message content - handle both direct and nested structures
+            output_array = None
+            if 'output' in data:
+                output_array = data['output']
+                logger.info(f"Found output array directly in data with {len(output_array)} items")
+            elif 'response' in data and isinstance(data['response'], dict) and 'output' in data['response']:
+                output_array = data['response']['output']
+                logger.info(f"Found output array in nested response with {len(output_array)} items")
+            
+            if not content and output_array:
+                for i, output_item in enumerate(output_array):
+                    logger.info(f"Output item {i}: type={output_item.get('type')}")
+                    if output_item.get('type') == 'message':
+                        message_content = output_item.get('content', [])
+                        logger.info(f"Found message with {len(message_content)} content items")
+                        for j, content_item in enumerate(message_content):
+                            logger.info(f"Content item {j}: type={content_item.get('type')}, text={content_item.get('text', '')}")
+                            if content_item.get('type') == 'output_text':
+                                potential_content = content_item.get('text', '')
+                                if 'result' in potential_content:
+                                    content = potential_content
+                                    logger.info(f"Found result content: {content}")
+                                    break
+                        if content:
+                            break
+            
+            if not content and not output_array:
+                logger.warning(f"No 'output' key found in data or nested response. Available keys: {list(data.keys())}")
+                if 'response' in data:
+                    logger.warning(f"Response keys: {list(data['response'].keys()) if isinstance(data['response'], dict) else 'Not a dict'}")
+        else:
+            # For response.json: look in choices[0].message.content
+            if 'response' in data and 'choices' in data['response'] and data['response']['choices']:
+                last_choice = data['response']['choices'][-1]
+                if 'message' in last_choice and 'content' in last_choice['message']:
+                    content = last_choice['message']['content']
+        
+        if content:
+            logger.info(f"Last {file_type} content: {content}")
+            
+            # Look for result patterns - need to check both True and False
+            # Updated patterns to handle additional JSON fields and both Python and JSON boolean values
+            true_pattern = r'\{\s*"result"\s*:\s*(true|True)\s*[,}]'
+            false_pattern = r'\{\s*"result"\s*:\s*(false|False)\s*[,}]'
+            
+            true_match = re.search(true_pattern, content)
+            false_match = re.search(false_pattern, content)
+            
+            if true_match:
+                logger.info(f"Found test result: True - PASSED")
+                return True
+            elif false_match:
+                logger.info(f"Found test result: False - FAILED")
+                return False
+            else:
+                logger.warning("No valid result pattern found in response content - marking as FAILED")
+                return False
+        else:
+            logger.warning(f"Could not extract content from {file_type} structure")
        
        logger.warning("Could not extract content from response structure")
        return False
--- a/autoqa/tested/assistants/create-assistant.txt
+++ b/autoqa/tested/assistants/create-assistant.txt
@ -0,0 +1,72 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait()
+finished()
+call_user()
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+- If a dialog appears (e.g., "Help Us Improve Jan" or "New Version/Update"), dismiss it before proceeding.
+- The **Add Assistant** dialog has its own vertical scrollbar; if controls are not visible, click inside the dialog to focus it, then scroll or drag the dialog’s scrollbar handle.
+
+## User Instruction
+
+Verify that the default predefined parameters for a new assistant are correct, keys are lower snake_case, and that saving/reopening preserves the values.
+
+### Steps
+
+1. Open **Assistants** from the bottom-left menu.
+2. Click **Create Assistant** to open the **Add Assistant** dialog.
+3. In **Name**, type: `Param Tester`
+4. In **Description**, type: `For parameter editing verification.`
+5. In **Instructions**, type: `Test assistant for changing predefined parameters.`
+6. In **Predefined Parameters**, click each chip so it appears in the **Parameters** list (scroll within the dialog if needed):
+   - Stream
+   - Temperature
+   - Frequency Penalty
+   - Presence Penalty
+   - Top P
+   - Top K
+7. Verify the **default values** shown after toggling the chips match exactly:
+   - Stream: **True** (Boolean)
+   - Temperature: **0.7**
+   - Frequency Penalty: **0.7**
+   - Presence Penalty: **0.7**
+   - Top P: **0.95**
+   - Top K: **2**
+8. Click **Save**.
+9. In the Assistants list, locate **Param Tester** (scroll the list if necessary) and click its **Edit** (pencil) icon.
+10. Verify the assistant’s **Name**, **Description**, **Instructions**, and all **Parameters** are present and unchanged (scroll within the dialog if needed).
+11. Click **×** to close the dialog.
+
+## Pass/Fail Output (strict)
+- Respond in English only.
+- Return ONLY one of the following JSON objects, with no extra text.
+
+If all parameters are visible, default values match exactly, and the saved assistant reopens with the same values and texts, return:
+  {"result": true}
+
+Otherwise, return:
+  {"result": false}
+
+IMPORTANT:
+- Your response must be ONLY the JSON above.
+- Do NOT add any other text before or after the JSON.
+
+"""
--- a/autoqa/tested/assistants/default-jan-assistant.txt
+++ b/autoqa/tested/assistants/default-jan-assistant.txt
@ -0,0 +1,52 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are going to test the Jan application by verifying that a default assistant named **Jan** is present.
+
+Step-by-step instructions:
+0. Given the Jan application is already open.
+1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing. This ensures full visibility of the interface.
+2. In the bottom-left menu, click on **Assistants**.
+3. On the Assistants screen, verify that there is a visible assistant card named **Jan**.
+4. Confirm that it has a description under the name that starts with:
+   "Jan is a helpful desktop assistant..."
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+   If the assistant named Jan is present and its description is visible, return:
+      {"result": true}
+
+      Otherwise, return:
+      {"result": false}
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
--- a/autoqa/tested/models/disable-selected-providers.txt
+++ b/autoqa/tested/models/disable-selected-providers.txt
@ -0,0 +1,78 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```
+Thought: ...
+Action: ...
+```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use "\
+" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are disabling specific Model Providers in the Jan application to verify they are hidden from the sidebar list.
+
+TARGET PROVIDERS TO DISABLE: Llama.cpp, OpenAI, Anthropic
+
+Step-by-step instructions:
+
+1. Open the Jan application.
+   - If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+   - If **New Version Available** popup appears on app launch (older versions), click **Remind Me Later** to dismiss it before continuing.
+
+2. Navigate to Model Providers settings:
+   - In the bottom-left menu, click **Settings**.
+   - In the left sidebar, click **Model Providers**.
+
+3. Disable selected providers:
+   - For each of the following providers on the right panel (Llama.cpp, OpenAI, Anthropic):
+     - Locate the provider row.
+     - On the right side of the row, find the enable/disable chip/toggle (enabled by default).
+     - If it is enabled, click the chip/toggle to disable it.
+     - Immediately call `wait()` once to allow the UI to update and the next screenshot to be captured for the model.
+     - If it is already disabled, leave it as is (no click). Still call `wait()` once to confirm state in the next screenshot.
+
+4. Verify providers are hidden in the sidebar list:
+   - Call `wait()` once more to ensure the sidebar reflects the latest state.
+   - Check the left sidebar list under **Model Providers**.
+   - Confirm that the entries for **Llama.cpp**, **OpenAI**, and **Anthropic** are not visible in the sidebar after disabling.
+
+5. Return result:
+   - If all three providers (Llama.cpp, OpenAI, Anthropic) are disabled and not visible in the sidebar list, return:
+     {"result": True}
+   - If any of the three providers is still visible in the sidebar list after being disabled, return:
+     {"result": False}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - On success, return:
+     {"result": True}
+   - On failure, return:
+     {"result": False}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
+
--- a/autoqa/tested/models/download-model-on-model-card.txt
+++ b/autoqa/tested/models/download-model-on-model-card.txt
@ -0,0 +1,59 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are going to verify that downloading a variant works correctly inside the **Model card page**.
+
+Steps:
+1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+2. In the left sidebar, click **Hub**.
+3. Click in the search bar and type exactly: `Menlo/Lucy-gguf`, then press **Enter**.
+4. In the search results, click on the **model name Menlo_Lucy-GGUF** to open the model card page.
+5. In the model card page, go to the list of variants and find **Menlo_Lucy-IQ3_XS**.
+   - Click directly on the **Download button** on the right side of that row.
+6. After clicking, watch for a **progress bar** to appear (this means the download started).
+7. Wait until the download finishes. Once done, the **Download** button should change to a **Use** button on that row.
+   - If it already shows **Use** before clicking (meaning it’s already downloaded), consider the check **passed**.
+
+Verification rule:
+- Consider the check **passed** if the variant **Menlo_Lucy-IQ3_XS** shows a **Use** button (meaning the download finished).
+- If it does not change to **Use** after downloading (or the download fails), the check **fails**.
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language.
+- You MUST return ONLY the JSON format below, nothing else.
+- Do NOT add any explanations, thoughts, or additional text.
+
+   If the targeted variant shows **Use** after you perform the steps (or it already shows **Use**), return: {"result": True}.
+   Otherwise, return: {"result": False}.
+
+IMPORTANT:
+- Your response must be ONLY the JSON above.
+- Do NOT add any other text before or after the JSON.
+
+"""
--- a/autoqa/tested/models/download-model-on-show-variants.txt
+++ b/autoqa/tested/models/download-model-on-show-variants.txt
@ -0,0 +1,58 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are going to verify that clicking **Download** for a variant in the **Show variants** list works, and that the button changes to **Use**.
+
+Steps:
+1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+2. In the left sidebar, click **Hub**.
+3. Click in the search bar and type exactly: `Menlo/Lucy-gguf`, then press **Enter**.
+4. In the **Menlo_Lucy-GGUF** card, make sure **Show variants** is turned on so the list of variants is visible.
+5. In the list of variants, find **Menlo_Lucy-IQ3_M**.
+   - Click **directly in the center of the round gray button with the down-arrow icon** on the right side of that row.  
+   - Do not click near the file size text or next to the icon — only in the middle of the icon button itself.
+6. After clicking, watch for a **progress bar** to appear on that row (this means the download started).  
+7. Wait for the download to complete. Once finished, the **Download** icon should be replaced by a **Use** button on that same row.  
+   - If it already shows **Use** before clicking (meaning it’s already downloaded), consider the check **passed**.
+
+Verification rule:
+- Consider the check **passed** if the variant **Menlo_Lucy-IQ3_M** shows a **Use** button (meaning the download finished).
+- If it does not change to **Use** after downloading (or the download fails), the check **fails**.
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language.
+- You MUST return ONLY the JSON format below, nothing else.
+- Do NOT add any explanations, thoughts, or additional text.
+
+   If the targeted variant shows **Use** after you perform the steps (or it already shows **Use**), return: {"result": True}.
+   Otherwise, return: {"result": False}.
+
+IMPORTANT:
+- Your response must be ONLY the JSON above.
+- Do NOT add any other text before or after the JSON.
+"""
--- a/autoqa/tested/models/download-model-with-url.txt
+++ b/autoqa/tested/models/download-model-with-url.txt
@ -0,0 +1,56 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in `Thought` part.
+
+## User Instruction
+
+You are going to verify that a **Hugging Face model can be added from its URL and downloaded successfully**.
+
+Steps:
+1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+2. In the bottom-left menu, click **Hub**.
+3. In the Hub search bar, paste the model URL:  
+   `https://huggingface.co/Menlo/Lucy-gguf/tree/main`
+4. Press **Enter** to search.
+5. In the results, click **Download** for the model.
+6. Wait until the **Download** button changes to **Use**.
+   - This indicates the model has been fully downloaded.
+
+Verification rule:
+- Consider the check **passed** if the button changes to **Use** after the download completes.
+- If the button does not change to **Use**, the check **fails**.
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language.
+- You MUST return ONLY the JSON format below, nothing else.
+- Do NOT add any explanations, thoughts, or additional text.
+
+   If the model downloads successfully and the button changes to "Use", return: {"result": True}.
+   Otherwise, return: {"result": False}.
+
+IMPORTANT:
+- Your response must be ONLY the JSON above.
+- Do NOT add any other text before or after the JSON.
+"""
--- a/autoqa/tested/models/model-hub.txt
+++ b/autoqa/tested/models/model-hub.txt
@ -0,0 +1,49 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are going to verify that **Hugging Face (HF) models are listed properly** in the app.
+
+Steps:
+1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+2. Open the **Hub** page (e.g., from the left sidebar or main navigation) by clicking **Hub**.
+3. On the **Hub** page, see list of the models appear
+4. Verification rule:
+   - If you can see **at least one model item** in the Hub list (i.e., the list is not empty), consider the check **passed**.
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language.
+- You MUST return ONLY the JSON format below, nothing else.
+- Do NOT add any explanations, thoughts, or additional text.
+
+   If models are visible in the Hub (list not empty), return: {"result": True}.
+   Otherwise, return: {"result": False}.
+
+IMPORTANT:
+- Your response must be ONLY the JSON above.
+- Do NOT add any other text before or after the JSON.
+"""
--- a/autoqa/tested/models/recommended-backend.txt
+++ b/autoqa/tested/models/recommended-backend.txt
@ -0,0 +1,60 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are going to verify that **Llama.cpp shows the recommended version & backend description** under Model Providers in Settings.
+
+Steps:
+1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+2. In the bottom-left menu, click **Settings**.
+3. In the left sidebar, click **Model Providers**.
+4. In the left sidebar menu, under **Model Providers**, click on **Llama.cpp**.  
+   - Make sure to click the one in the **sidebar**, not the entry in the main panel.  
+   - Click directly in the center of the "Llama.cpp" text label in the sidebar to open its configuration page.
+5. In the **Version & Backend** section, check the description.
+
+Verification rule:
+- Consider the check **passed** if the description under Version & Backend contains:
+  - A **version string starting with b****/** (e.g., `b6097/win-avx2-cuda-cu12.0-x64`, `b6097/win-avx2-x64`, or `b6097/win-vulkan-x64`),  
+  - Followed by the text **"Version and backend is the recommended backend"**.
+- The exact version (e.g., b6097, b5857, b5833, etc.) may vary — any valid build number is acceptable as long as the description includes the phrase above.
+- If this text is missing or different, the check **fails**.
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language.
+- You MUST return ONLY the JSON format below, nothing else.
+- Do NOT add any explanations, thoughts, or additional text.
+
+   If the description is exactly as expected, return: {"result": True}.
+   Otherwise, return: {"result": False}.
+
+IMPORTANT:
+- Your response must be ONLY the JSON above.
+- Do NOT add any other text before or after the JSON.
+
+"""
--- a/autoqa/tests/new-user/1-user-start-chatting.txt
+++ b/autoqa/tests/new-user/1-user-start-chatting.txt
@ -14,4 +14,4 @@ Step-by-step instructions:
 If the model responds correctly, return: {"result": True}, otherwise return: {"result": False}.

 In all your responses, use only plain ASCII characters. Do NOT use Unicode symbols
-"""
+"""
--- a/autoqa/tested/settings/app-data.txt
+++ b/autoqa/tested/settings/app-data.txt
@ -0,0 +1,65 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait()
+finished()
+call_user()
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+- If a dialog appears (e.g., "Help Us Improve Jan" or "New Version/Update"), dismiss it before proceeding.
+- The **Add Assistant** dialog has its own vertical scrollbar; if controls are not visible, click inside the dialog to focus it, then scroll or drag the dialog’s scrollbar handle.
+
+## User Instruction
+
+You are going to verify the App data folder path shown in Jan’s Settings.
+
+Navigation: **Settings > General**
+
+Step-by-step instructions:
+0. Given the Jan application is already opened.
+1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+2. In the bottom-left menu, click on **Settings**.
+3. In the left sidebar, click on **General**.
+4. In the **Data folder** section of **General**, locate **App data** and the line **"Default location for messages and other user data:"**. Read the displayed path.
+5. Verify the displayed path matches **one** of the expected OS-specific defaults below (accept either the standard or nightly variant):
+   - **Windows (standard):** `C:\\Users\\<Username>\\AppData\\Roaming\\Jan\\data`
+   - **Windows (nightly):** `C:\\Users\\<Username>\\AppData\\Roaming\\Jan-nightly\\data`
+   - **macOS (standard):** `/Users/<Username>/Library/Application Support/Jan/data`
+   - **macOS (nightly):** `/Users/<Username>/Library/Application Support/Jan-nightly/data`
+   - **Linux (standard):** `/home/<Username>/.local/share/Jan/data`
+   - **Linux (nightly):** `/home/<Username>/.local/share/Jan-nightly/data`
+
+Notes for verification (guidance only, do not display to user):
+- Windows paths typically start with a drive letter and include `\\AppData\\Roaming\\Jan\\data` or `\\AppData\\Roaming\\Jan-nightly\\data`.
+- macOS paths start with `/Users/` and include `/Library/Application Support/Jan/data` or `/Library/Application Support/Jan-nightly/data`.
+- Linux paths start with `/home/` and include `/.local/share/Jan/data` or `/.local/share/Jan-nightly/data`.
+- If the **Data folder** section is not visible, scroll down within **General** until it appears.
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   If the displayed App data path matches one of the expected OS-specific defaults above, return: {"result": True}. Otherwise, return: {"result": False}.
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+
+"""
--- a/autoqa/tested/settings/app-log.txt
+++ b/autoqa/tested/settings/app-log.txt
@ -0,0 +1,51 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are going to verify that the **Open Logs** button works correctly in Settings.
+
+Steps:
+1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+2. In the bottom-left menu, click **Settings**.
+3. In the left sidebar, click on **General**.
+4. Scroll down until you see the **Data folder** section.
+5. In the **Data folder** section, click the **Open Logs** button.
+6. Verification rule:
+   - If an **App Logs - Jan** window is displayed **and it contains some logs**, consider the check **passed**.
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language.
+- You MUST return ONLY the JSON format below, nothing else.
+- Do NOT add any explanations, thoughts, or additional text.
+
+   If the App Logs window appears, return: {"result": True}.
+   Otherwise, return: {"result": False}.
+
+IMPORTANT:
+- Your response must be ONLY the JSON above.
+- Do NOT add any other text before or after the JSON.
+"""
--- a/autoqa/tested/settings/check-for-updates.txt
+++ b/autoqa/tested/settings/check-for-updates.txt
@ -0,0 +1,55 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait()
+finished()
+call_user()
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+- If a dialog appears (e.g., "Help Us Improve Jan" or "New Version/Update"), dismiss it before proceeding.
+- If **New Version Available** popup appears on app launch (older versions), click **Remind Me Later** to dismiss it before continuing.
+
+## User Instruction
+
+You are going to test the **Check for Updates** function in Jan’s Settings.
+
+Navigation: **Settings > General**
+
+Step-by-step instructions:
+0. Given the Jan application is already opened.
+1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+2. If a **New Version Available** popup (e.g., "New Version 0.6.8 Update Available") appears immediately on startup, click **Remind Me Later** to dismiss it before continuing.
+3. In the bottom-left menu, click on **Settings**.
+4. In the left sidebar, click on **General**.
+5. In the **General** section, locate **Check for Updates** and click the button.
+6. Verify the behavior:
+   - If Jan is already up to date, a message such as **"You're running the latest version"** should appear.
+   - If a new version is available, a popup should appear in the bottom-right corner with text like **"New Version X.Y.Z Update Available"** (e.g., "New Version 0.6.8 Update Available"), confirming the update check works as expected.
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   If the **Check for Updates** button correctly shows either "You're running the latest version" or the new version popup (e.g., "New Version 0.6.8 Update Available"), return: {"result": True}. Otherwise, return: {"result": False}.
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
--- a/autoqa/tested/settings/enable-mcp-server.txt
+++ b/autoqa/tested/settings/enable-mcp-server.txt
@ -0,0 +1,55 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are going to test the Jan application by verifying that enabling Experimental Features reveals the MCP Servers section in Settings.
+
+Step-by-step instructions:
+0. Given the Jan application is already open.
+1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing. This ensures full visibility of the interface.
+2. In the bottom-left menu, click **Settings**.
+3. In the left sidebar, make sure **General** is selected.
+4. Scroll down to the **Advanced** section.
+5. Locate the toggle labeled **Experimental Features** and switch it ON.
+6. Observe the **Settings** sidebar.
+7. Verify that a new section called **MCP Servers** appears.
+8. Click on **MCP Servers** in the sidebar to ensure it opens and displays its content correctly.
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+   If the MCP Servers section appears after enabling Experimental Features and you can open it successfully, return:
+    {"result": true}
+
+    Otherwise, return:
+    {"result": false}
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
--- a/autoqa/tested/settings/extensions.txt
+++ b/autoqa/tested/settings/extensions.txt
@ -0,0 +1,56 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are going to test the Jan application by verifying the available extensions listed under Settings → Extensions.
+
+Step-by-step instructions:
+0. Given the Jan application is already open.
+1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing. This ensures full visibility of the interface.
+2. In the bottom-left corner, click **Settings**.
+3. In the left sidebar of Settings, click on **Extensions**.
+4. In the main panel, confirm that the following four extensions are listed:
+   - Jan Assistant
+   - Conversational
+   - Download Manager
+   - llama.cpp Inference Engine
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   If all four extensions are present, return:
+      {"result": true}
+
+   Otherwise, return:
+      {"result": false}
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
--- a/autoqa/tested/settings/hardware-info.txt
+++ b/autoqa/tested/settings/hardware-info.txt
@ -0,0 +1,94 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are going to test the Jan application by verifying that the hardware information is displayed correctly in the Settings panel.
+
+Step-by-step instructions:
+0. Given the Jan application is already opened.
+1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing. This ensures full visibility of the interface.
+2. In the bottom-left menu, click on **Settings**.
+3. In the left sidebar, click on **Hardware**.
+
+4. In the main panel, ensure the following sections are displayed clearly with appropriate system information:
+
+---
+
+**Operating System**
+- This section should display:
+  - A name such as "Windows", "Ubuntu", or "Macos"
+  - A version string like "Windows 11 Pro", "22.04.5 LTS", or "macOS 15.5 Sequoia"
+
+---
+
+**CPU**
+- This section should display:
+  - A processor model (e.g., Intel, AMD, or Apple Silicon)
+  - An architecture (e.g., x86_64, amd64, or aarch64)
+  - A number of cores
+  - Optional: An instruction set list (may appear on Linux or Windows)
+  - A usage bar indicating current CPU load
+
+---
+
+**Memory**
+- This section should display:
+  - Total RAM
+  - Available RAM
+  - A usage bar showing memory consumption
+
+---
+
+**GPUs**
+- This section is located at the bottom of the Hardware page.
+- If the system has a GPU:
+  - It should display the GPU name (e.g., NVIDIA GeForce GTX 1080)
+  - A toggle should be available to enable or disable GPU usage
+- If no GPU is detected:
+  - It should display a message like “No GPUs detected”
+
+---
+
+**Final Check**
+- Ensure that there are **no error messages** in the UI.
+- The layout should appear clean and correctly rendered with no broken visual elements.
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   If all sections display relevant hardware information accurately and the interface is error-free, return:
+    {"result": true}
+
+    Otherwise, return:
+    {"result": false}
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
--- a/autoqa/tested/settings/local-api-server-logs.txt
+++ b/autoqa/tested/settings/local-api-server-logs.txt
@ -0,0 +1,52 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in `Thought` part.
+
+## User Instruction
+
+You are going to verify that the **Local API Server** can start and logs are displayed correctly.
+
+Steps:
+1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+2. In the bottom-left menu, click **Settings**.
+3. In the left sidebar, click on **Local API Server**.
+4. Under **Server Configuration**, type `"1234"` into the **API Key** field **if the field is empty**, otherwise ignore this step.
+5. Click the **Start Server** button in the Local API Server section.
+6. In the **Server Logs** area, click **Open Logs**.
+7. Verification rule:
+   - If the logs window shows entries such as `INFO Jan API server started on http://127.0.0.1:1337` (or similar log lines confirming the server started), consider the check **passed**.
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language.
+- You MUST return ONLY the JSON format below, nothing else.
+- Do NOT add any explanations, thoughts, or additional text.
+
+   If the server logs window shows that the server started (e.g., contains an INFO startup log), return: {"result": True}.
+   Otherwise, return: {"result": False}.
+
+IMPORTANT:
+- Your response must be ONLY the JSON above.
+- Do NOT add any other text before or after the JSON.
+"""
--- a/autoqa/tested/settings/providers-available.txt
+++ b/autoqa/tested/settings/providers-available.txt
@ -0,0 +1,58 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are going to test the Jan application by verifying that all expected model providers are listed in the Settings panel.
+
+Step-by-step instructions:
+0. Given the Jan application is already opened.
+1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing. This ensures full visibility of the interface.
+2. In the bottom-left menu, click on **Settings**.
+3. In the left sidebar of Settings, click on **Model Providers**.
+4. In the main panel, verify that the following model providers are listed:
+   - Llama.cpp
+   - OpenAI
+   - Anthropic
+   - Cohere
+   - OpenRouter
+   - Mistral
+   - Groq
+   - Gemini
+   - Hugging Face
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   If all the providers are visible, return: {"result": True}. Otherwise, return: {"result": False}.
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
--- a/autoqa/tested/settings/shortcuts.txt
+++ b/autoqa/tested/settings/shortcuts.txt
@ -0,0 +1,58 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait()
+finished()
+call_user()
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are going to verify that the Shortcuts list is correctly shown.
+
+Steps:
+1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it.
+2. If a dialog appears in the bottom-right corner about a **New Version / Update Available**, click **Remind Me Later** to dismiss it.
+3. Open **Settings** from the bottom-left menu.
+4. Click **Shortcuts** in the left sidebar.
+5. In the main panel, verify the following shortcuts are visible and correctly listed (order may vary):
+   - New Chat — Ctrl N
+   - Toggle Sidebar — Ctrl B
+   - Zoom In — Ctrl +
+   - Zoom Out — Ctrl -
+   - Send Message — Enter
+   - New Line — Shift Enter
+   - Go to Settings — Ctrl ,
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- Respond in English only.
+- Return ONLY one of the following JSON objects, with no extra text.
+
+If all shortcuts are present and correct, return:
+  {"result": true}
+
+Otherwise, return:
+  {"result": false}
+
+IMPORTANT:
+- Your response must be ONLY the JSON above.
+- Do NOT add any other text before or after the JSON.
+
+"""
--- a/autoqa/tests/base/models/download-model.txt
+++ b/autoqa/tests/base/models/download-model.txt
@ -0,0 +1,62 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+- You need to clear the search bar after searching the models
+
+## User Instruction
+
+You are going to test the Jan application by downloading and verifying models.
+
+Step-by-step instructions:
+Step-by-step instructions:
+1. Given the Jan application is already opened.
+2. In the **bottom-left corner**, click the **Hub** menu item.
+3. Locate the **qwen3-0.6B** model in the Hub list:
+   - If the button says **Download**, click it to download the model.
+   - If the button says **Use**, the model is already installed.
+   - Clear the search bar once the download is done
+4. Locate the **lucy-128k-gguf** model in the Hub list:
+   - If the button says **Download**, click it to download the model.
+   - If the button says **Use**, the model is already installed.
+   - Clear the search bar once the download is done
+5. Wait for both models to finish downloading and become ready (if downloading was required).
+6. Once available, toggle the **Downloaded** switch (white chip on the gray rounded button located to the left of the word "downloaded" on the top right of the app) to view only downloaded models.
+7. Verify that both **qwen3-0.6B** and **lucy-128k-gguf** appear in the downloaded models list.
+8. Navigate to **Settings > Model Providers**.
+9. In the left sidebar, click on **Llama.cpp**.
+10. Verify that both **qwen3-0.6B** and **lucy-128k-gguf** are listed under the Models section..
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language.
+- You MUST return ONLY the JSON format below, nothing else.
+- Do NOT add any explanations, thoughts, or additional text.
+
+   If both models are successfully downloaded, appear under the Downloaded toggle, and are displayed under Settings > Model Providers > Llama.cpp, return: {"result": True}. Otherwise, return: {"result": False}.
+
+IMPORTANT:
+- Your response must be ONLY the JSON above.
+- Do NOT add any other text before or after the JSON.
+"""
--- a/autoqa/tests/migration/appearance/setup-dark-theme-appearance.txt
+++ b/autoqa/tests/migration/appearance/setup-dark-theme-appearance.txt
@ -0,0 +1,69 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```
+Thought: ...
+Action: ...
+```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use "\
+" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are changing the Appearance theme to Dark in the OLD version of the Jan application.
+
+PHASE: SETUP DARK THEME APPEARANCE (OLD VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (OLD version).
+   - If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+   - If **New Version Available** popup appears on app launch (older versions), click **Remind Me Later** to dismiss it before continuing.
+
+2. Navigate to Appearance settings:
+   - In the bottom-left menu, click **Settings**.
+   - In the left sidebar, click **Appearance**.
+
+3. Change the theme to Dark:
+   - In the **Theme** field, click the current theme menu (e.g., **System**, **Light**, or **Dark**).
+   - Select **Dark** (only change if it is not already Dark).
+
+4. Return result:
+   - If the Theme is set to **Dark**, return:
+     {"result": True, "phase": "setup_complete"}
+   - If the Theme cannot be set to **Dark**, return:
+     {"result": False, "phase": "setup_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If the configuration is successful, return:
+     {"result": True, "phase": "setup_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "setup_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
+
--- a/autoqa/tests/migration/appearance/setup-font-size-appearance.txt
+++ b/autoqa/tests/migration/appearance/setup-font-size-appearance.txt
@ -0,0 +1,71 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```
+Thought: ...
+Action: ...
+```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use "\
+" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are changing the Font Size setting to "Extra Large" in the OLD version of the Jan application.
+
+PHASE: SETUP FONT SIZE APPEARANCE (OLD VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (OLD version).
+   - If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+   - If **New Version Available** popup appears on app launch (older versions), click **Remind Me Later** to dismiss it before continuing.
+
+2. Navigate to Appearance settings:
+   - In the bottom-left menu, click **Settings**.
+   - In the left sidebar, click **Appearance**.
+
+3. Change the font size to "Extra Large":
+   - Locate the **Font Size** section with description "Adjust the app's font size."
+   - Click on the current font size button (likely showing "Medium").
+   - In the dropdown menu that appears, select **"Extra Large"**.
+   - Verify that the font size button now displays "Extra Large".
+
+4. Return result:
+   - If the Font Size is set to **"Extra Large"**, return:
+     {"result": True, "phase": "setup_complete"}
+   - If the Font Size cannot be set to **"Extra Large"**, return:
+     {"result": False, "phase": "setup_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If the configuration is successful, return:
+     {"result": True, "phase": "setup_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "setup_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
+
--- a/autoqa/tests/migration/appearance/setup-light-theme-appearance.txt
+++ b/autoqa/tests/migration/appearance/setup-light-theme-appearance.txt
@ -0,0 +1,69 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```
+Thought: ...
+Action: ...
+```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use "\
+" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are changing the Appearance theme to Light in the OLD version of the Jan application.
+
+PHASE: SETUP LIGHT THEME APPEARANCE (OLD VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (OLD version).
+   - If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+   - If **New Version Available** popup appears on app launch (older versions), click **Remind Me Later** to dismiss it before continuing.
+
+2. Navigate to Appearance settings:
+   - In the bottom-left menu, click **Settings**.
+   - In the left sidebar, click **Appearance**.
+
+3. Change the theme to Light:
+   - In the **Theme** field, click the current theme menu (e.g., **System**, **Light**, or **Dark**).
+   - Select **Light** (only change if it is not already Light).
+
+4. Return result:
+   - If the Theme is set to **Light**, return:
+     {"result": True, "phase": "setup_complete"}
+   - If the Theme cannot be set to **Light**, return:
+     {"result": False, "phase": "setup_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If the configuration is successful, return:
+     {"result": True, "phase": "setup_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "setup_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
+
--- a/autoqa/tests/migration/appearance/verify-dark-theme-appearance-persistence.txt
+++ b/autoqa/tests/migration/appearance/verify-dark-theme-appearance-persistence.txt
@ -0,0 +1,69 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```
+Thought: ...
+Action: ...
+```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use "\
+" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are verifying that the Dark theme Appearance setting persists after upgrading to the NEW version of the Jan application.
+
+PHASE: VERIFY DARK THEME APPEARANCE PERSISTENCE (NEW VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (NEW version).
+   - If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+   - If **New Version Available** popup appears on app launch (older versions), click **Remind Me Later** to dismiss it before continuing.
+
+2. Navigate to Appearance settings:
+   - In the bottom-left menu, click **Settings**.
+   - In the left sidebar, click **Appearance**.
+
+3. Verify Dark theme persistence:
+   - Check the **Theme** field menu and read the current theme value.
+   - Confirm the theme is **Dark**.
+
+4. Return result:
+   - If the current theme is **Dark**, return:
+     {"result": True, "phase": "verification_complete"}
+   - Otherwise, return:
+     {"result": False, "phase": "verification_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If the settings persist and are correct, return:
+     {"result": True, "phase": "verification_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "verification_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
+
--- a/autoqa/tests/migration/appearance/verify-font-size-appearance-persistence.txt
+++ b/autoqa/tests/migration/appearance/verify-font-size-appearance-persistence.txt
@ -0,0 +1,70 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```
+Thought: ...
+Action: ...
+```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use "\
+" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are verifying that the Font Size setting "Extra Large" persists after upgrading to the NEW version of the Jan application.
+
+PHASE: VERIFY FONT SIZE APPEARANCE PERSISTENCE (NEW VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (NEW version).
+   - If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+   - If **New Version Available** popup appears on app launch (older versions), click **Remind Me Later** to dismiss it before continuing.
+
+2. Navigate to Appearance settings:
+   - In the bottom-left menu, click **Settings**.
+   - In the left sidebar, click **Appearance**.
+
+3. Verify Font Size persistence:
+   - Locate the **Font Size** section with description "Adjust the app's font size."
+   - Check the font size button and read the current font size value.
+   - Confirm the font size is **"Extra Large"**.
+
+4. Return result:
+   - If the current font size is **"Extra Large"**, return:
+     {"result": True, "phase": "verification_complete"}
+   - Otherwise, return:
+     {"result": False, "phase": "verification_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If the settings persist and are correct, return:
+     {"result": True, "phase": "verification_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "verification_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
+
--- a/autoqa/tests/migration/appearance/verify-light-theme-appearance-persistence.txt
+++ b/autoqa/tests/migration/appearance/verify-light-theme-appearance-persistence.txt
@ -0,0 +1,69 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```
+Thought: ...
+Action: ...
+```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use "\
+" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are verifying that the Light theme Appearance setting persists after upgrading to the NEW version of the Jan application.
+
+PHASE: VERIFY LIGHT THEME APPEARANCE PERSISTENCE (NEW VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (NEW version).
+   - If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+   - If **New Version Available** popup appears on app launch (older versions), click **Remind Me Later** to dismiss it before continuing.
+
+2. Navigate to Appearance settings:
+   - In the bottom-left menu, click **Settings**.
+   - In the left sidebar, click **Appearance**.
+
+3. Verify Light theme persistence:
+   - Check the **Theme** field menu and read the current theme value.
+   - Confirm the theme is **Light**.
+
+4. Return result:
+   - If the current theme is **Light**, return:
+     {"result": True, "phase": "verification_complete"}
+   - Otherwise, return:
+     {"result": False, "phase": "verification_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If the settings persist and are correct, return:
+     {"result": True, "phase": "verification_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "verification_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
+
--- a/autoqa/tests/migration/assistants/setup-chat-with-assistant.txt
+++ b/autoqa/tests/migration/assistants/setup-chat-with-assistant.txt
@ -0,0 +1,75 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are setting up a chat thread using a custom assistant in the OLD version of the Jan application.
+
+PHASE: SETUP CHAT THREAD (OLD VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (OLD version).
+
+2. Download the model:
+   - In the bottom-left corner, click **Hub**.
+   - Find and download the model named: `jan-nano-gguf`.
+   - Wait for the download to complete (the button changes to **Use**).
+   - Click the **Use** button to return to the Chat UI.
+
+3. Start a new chat using a custom assistant:
+   - In the main chat panel, click the assistant icon at the top (default is `Jan`).
+   - Select the custom assistant: `Python Tutor`.
+
+4. Select the model:
+   - Click the **Select a model** button below the chat input.
+   - Choose: `jan-nano-gguf` under the `Llama.Cpp` section.
+
+5. Send a test message:
+   - Type: `Hello world` and press Enter or click send message (button with right arrow). You should click at the center of the button.
+   - Wait up to 1–2 minutes for the model to load and respond.
+
+6. Verify the model responds:
+   - If the model replies appropriately, and the thread is created successfully in the left sidebar under **No threads yet**, return:
+     {"result": True, "phase": "setup_complete"}
+   - If no response is received or the chat thread is not saved:
+     {"result": False, "phase": "setup_failed"}
+5. Verify the model responds and return the result in the exact JSON format:
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If the model replies appropriately, and the thread is created successfully in the left sidebar, return:
+     {"result": True, "phase": "setup_complete"}
+   - If no response is received:
+     {"result": False, "phase": "setup_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
--- a/autoqa/tests/migration/assistants/setup-create-assistants.txt
+++ b/autoqa/tests/migration/assistants/setup-create-assistants.txt
@ -0,0 +1,77 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are setting up custom assistants in the OLD version of the Jan application.
+
+PHASE: SETUP CUSTOM ASSISTANTS (OLD VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (OLD version).
+
+2. Navigate to Assistants:
+   - In the bottom-left menu, click **Assistants**.
+
+3. Create first custom assistant:
+   - Click **Create Assistant** button.
+   - Enter name: `Python Tutor`
+   - Enter description: `A helpful Python programming tutor that explains concepts clearly and provides code examples.`
+   - Enter instructions: `You are a Python programming tutor. Help users learn Python by explaining concepts clearly, providing code examples, and answering questions about Python programming.`
+   - Click **Create** button.
+
+4. Create second custom assistant:
+   - Click **Create Assistant** button again.
+   - Enter name: `Creative Writer`
+   - Enter description: `A creative writing assistant that helps with storytelling, poetry, and creative content.`
+   - Enter instructions: `You are a creative writing assistant. Help users develop stories, write poetry, and create engaging creative content. Provide constructive feedback and creative suggestions.`
+   - Click **Create** button.
+
+5. Verify assistants were created:
+   - Check that both `Python Tutor` and `Creative Writer` appear in the assistants list.
+   - Verify their names, descriptions, and instructions are correct.
+
+6. Return result:
+   - If both assistants are created successfully with correct details, return:
+     {"result": True, "phase": "setup_complete"}
+   - If there are any issues with creation or verification, return:
+     {"result": False, "phase": "setup_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If both assistants are created successfully, return:
+     {"result": True, "phase": "setup_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "setup_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
--- a/autoqa/tests/migration/assistants/verify-chat-with-assistant-persistence.txt
+++ b/autoqa/tests/migration/assistants/verify-chat-with-assistant-persistence.txt
@ -0,0 +1,74 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are verifying that chat threads with custom assistants persist after upgrading to the NEW version of the Jan application.
+
+PHASE: VERIFY CHAT THREAD PERSISTENCE (NEW VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (NEW version).
+
+2. Check for existing chat threads:
+   - Look in the left sidebar for any existing chat threads.
+   - If you see threads, click on one to open it.
+
+3. Verify the custom assistant is still available:
+   - In the main chat panel, click the assistant icon at the top.
+   - Check if the custom assistant `Python Tutor` is still available in the list.
+
+4. Verify the model is still available:
+   - Click the **Select a model** button below the chat input.
+   - Check if `jan-nano-gguf` is still available under the `Llama.Cpp` section.
+
+5. Test chat functionality:
+   - Select the `Python Tutor` assistant.
+   - Select the `jan-nano-gguf` model.
+   - Send a test message: `Hello, can you help me with Python?`
+   - Wait for a response.
+
+6. Return result:
+   - If the chat thread, assistant, and model all persist and work correctly, return:
+     {"result": True, "phase": "verification_complete"}
+   - If any of these elements are missing or not working, return:
+     {"result": False, "phase": "verification_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If everything persists and works correctly, return:
+     {"result": True, "phase": "verification_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "verification_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
--- a/autoqa/tests/migration/assistants/verify-create-assistant-persistence.txt
+++ b/autoqa/tests/migration/assistants/verify-create-assistant-persistence.txt
@ -0,0 +1,73 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are verifying that custom assistants persist after upgrading to the NEW version of the Jan application.
+
+PHASE: VERIFY ASSISTANT PERSISTENCE (NEW VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (NEW version).
+
+2. Navigate to Assistants:
+   - In the bottom-left menu, click **Assistants**.
+
+3. Verify first custom assistant:
+   - Look for the `Python Tutor` assistant in the list.
+   - Click on it to view details.
+   - Verify the name is exactly: `Python Tutor`
+   - Verify the description contains: `A helpful Python programming tutor that explains concepts clearly and provides code examples.`
+   - Verify the instructions contain: `You are a Python programming tutor. Help users learn Python by explaining concepts clearly, providing code examples, and answering questions about Python programming.`
+
+4. Verify second custom assistant:
+   - Look for the `Creative Writer` assistant in the list.
+   - Click on it to view details.
+   - Verify the name is exactly: `Creative Writer`
+   - Verify the description contains: `A creative writing assistant that helps with storytelling, poetry, and creative content.`
+   - Verify the instructions contain: `You are a creative writing assistant. Help users develop stories, write poetry, and create engaging creative content. Provide constructive feedback and creative suggestions.`
+
+5. Return result:
+   - If both assistants persist with correct names, descriptions, and instructions, return:
+     {"result": True, "phase": "verification_complete"}
+   - If any assistant is missing or has incorrect details, return:
+     {"result": False, "phase": "verification_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If both assistants persist correctly, return:
+     {"result": True, "phase": "verification_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "verification_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
--- a/autoqa/tests/migration/models/setup-download-models.txt
+++ b/autoqa/tests/migration/models/setup-download-models.txt
@ -0,0 +1,83 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are setting up models in the OLD version of the Jan application.
+
+PHASE: SETUP MODELS (OLD VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (OLD version).
+
+2. Navigate to Hub:
+   - In the bottom-left corner, click **Hub**.
+
+3. Download first model:
+   - Find the model named: `jan-nano-gguf`
+   - Click the **Download** button.
+   - Wait for the download to complete (the button changes to **Use**).
+   - Click the **Use** button to return to the Chat UI.
+
+4. Download second model:
+   - Go back to **Hub**.
+   - Find the model named: `gemma-2-2b-instruct-gguf`
+   - Click the **Download** button.
+   - Wait for the download to complete (the button changes to **Use**).
+   - Click the **Use** button to return to the Chat UI.
+
+5. Verify models are available:
+   - In the Chat UI, click the **Select a model** button below the chat input.
+   - Check that both models appear under the `Llama.Cpp` section:
+     - `jan-nano-gguf`
+     - `gemma-2-2b-instruct-gguf`
+
+6. Test model functionality:
+   - Select `jan-nano-gguf` as the model.
+   - Type a test message: `Hello, can you respond?`
+   - Press Enter and wait for a response.
+
+7. Return result:
+   - If both models are downloaded and functional, return:
+     {"result": True, "phase": "setup_complete"}
+   - If there are any issues with downloads or functionality, return:
+     {"result": False, "phase": "setup_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If both models are downloaded and working, return:
+     {"result": True, "phase": "setup_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "setup_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
--- a/autoqa/tests/migration/models/setup-model-providers.txt
+++ b/autoqa/tests/migration/models/setup-model-providers.txt
@ -0,0 +1,81 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```
+Thought: ...
+Action: ...
+```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use "\
+" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are configuring Model Providers in the OLD version of the Jan application to prepare for migration verification.
+
+PHASE: SETUP MODEL PROVIDERS (OLD VERSION)
+
+Desired state:
+- Disabled: Llama.cpp, OpenAI, Anthropic
+- Enabled: Cohere, OpenRouter, Mistral, Groq, Gemini, Hugging Face
+
+Step-by-step instructions:
+
+1. Open the Jan application (OLD version).
+   - If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+   - If **New Version Available** popup appears on app launch (older versions), click **Remind Me Later** to dismiss it before continuing.
+
+2. Navigate to Model Providers settings:
+   - In the bottom-left menu, click **Settings**.
+   - In the left sidebar, click **Model Providers**.
+
+3. Set provider states:
+   - For each provider in the right panel, use the chip/toggle on the right side of the row to set the desired state:
+     - Llama.cpp: disable (toggle off if enabled), then call `wait()` once
+     - OpenAI: disable (toggle off if enabled), then call `wait()` once
+     - Anthropic: disable (toggle off if enabled), then call `wait()` once
+     - Cohere: enable (toggle on if disabled), then call `wait()` once
+     - OpenRouter: enable (toggle on if disabled), then call `wait()` once
+     - Mistral: enable (toggle on if disabled), then call `wait()` once
+     - Groq: enable (toggle on if disabled), then call `wait()` once
+     - Gemini: enable (toggle on if disabled), then call `wait()` once
+     - Hugging Face: enable (toggle on if disabled), then call `wait()` once
+
+4. Return result:
+   - If all providers are set to the desired states (Disabled: Llama.cpp, OpenAI, Anthropic; Enabled: Cohere, OpenRouter, Mistral, Groq, Gemini, Hugging Face), return:
+     {"result": True, "phase": "setup_complete"}
+   - If any provider cannot be set to the desired state, return:
+     {"result": False, "phase": "setup_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If the configuration is successful, return:
+     {"result": True, "phase": "setup_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "setup_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
+
--- a/autoqa/tests/migration/models/verify-model-persistence.txt
+++ b/autoqa/tests/migration/models/verify-model-persistence.txt
@ -0,0 +1,84 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are verifying that downloaded models persist after upgrading to the NEW version of the Jan application.
+
+PHASE: VERIFY MODEL PERSISTENCE (NEW VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (NEW version).
+
+2. Check Hub for downloaded models:
+   - In the bottom-left corner, click **Hub**.
+   - Look for the **Downloaded** filter toggle on the right side.
+   - Click the **Downloaded** filter to show only downloaded models.
+
+3. Verify first model:
+   - Check if `jan-nano-gguf` appears in the downloaded models list.
+   - Verify it shows the **Use** button (not **Download**).
+
+4. Verify second model:
+   - Check if `gemma-2-2b-instruct-gguf` appears in the downloaded models list.
+   - Verify it shows the **Use** button (not **Download**).
+
+5. Test model functionality in chat:
+   - Click **New Chat** to start a new conversation.
+   - Click the **Select a model** button below the chat input.
+   - Check if both models appear under the `Llama.Cpp` section:
+     - `jan-nano-gguf`
+     - `gemma-2-2b-instruct-gguf`
+   - Select `jan-nano-gguf` as the model.
+   - Send a test message: `Hello, are you still working after the upgrade?`
+   - Wait for a response.
+
+6. Check model provider settings:
+   - Go to **Settings** > **Model Providers**.
+   - Click on **Llama.cpp** section.
+   - Verify both models are listed in the Models section.
+
+7. Return result:
+   - If both models persist and are functional, return:
+     {"result": True, "phase": "verification_complete"}
+   - If any models are missing or not working, return:
+     {"result": False, "phase": "verification_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If both models persist and work correctly, return:
+     {"result": True, "phase": "verification_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "verification_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
--- a/autoqa/tests/migration/models/verify-model-providers-persistence.txt
+++ b/autoqa/tests/migration/models/verify-model-providers-persistence.txt
@ -0,0 +1,81 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```
+Thought: ...
+Action: ...
+```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use "\
+" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are verifying that the Model Providers enabled/disabled state persists after upgrading to the NEW version of the Jan application.
+
+PHASE: VERIFY MODEL PROVIDERS PERSISTENCE (NEW VERSION)
+
+Expected persisted state:
+- Disabled: Llama.cpp, OpenAI, Anthropic
+- Enabled: Cohere, OpenRouter, Mistral, Groq, Gemini, Hugging Face
+
+Step-by-step instructions:
+
+1. Open the Jan application (NEW version).
+   - If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+   - If **New Version Available** popup appears on app launch (older versions), click **Remind Me Later** to dismiss it before continuing.
+
+2. Navigate to Model Providers settings:
+   - In the bottom-left menu, click **Settings**.
+   - In the left sidebar, click **Model Providers**.
+
+3. Verify provider states:
+   - For each provider in the right panel, read the chip/toggle state and confirm it matches the expected persisted state:
+     - Llama.cpp: disabled
+     - OpenAI: disabled
+     - Anthropic: disabled
+     - Cohere: enabled
+     - OpenRouter: enabled
+     - Mistral: enabled
+     - Groq: enabled
+     - Gemini: enabled
+     - Hugging Face: enabled
+
+4. Return result:
+   - If all providers match the expected states, return:
+     {"result": True, "phase": "verification_complete"}
+   - If any provider does not match the expected state, return:
+     {"result": False, "phase": "verification_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If the settings persist and are correct, return:
+     {"result": True, "phase": "verification_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "verification_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
+
--- a/autoqa/tests/migration/settings/setup-https-proxy.txt
+++ b/autoqa/tests/migration/settings/setup-https-proxy.txt
@ -0,0 +1,73 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```
+Thought: ...
+Action: ...
+```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use "\
+" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are configuring HTTPS Proxy settings in the OLD version of the Jan application.
+
+PHASE: SETUP HTTPS PROXY (OLD VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (OLD version).
+   - If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+   - If **New Version Available** popup appears on app launch (older versions), click **Remind Me Later** to dismiss it before continuing.
+
+2. Navigate to HTTPS Proxy settings:
+   - In the bottom-left menu, click **Settings**.
+   - In the left sidebar, click **HTTPS Proxy**.
+
+3. Configure HTTPS Proxy:
+   - Proxy URL: In the field with placeholder `http://proxy.example.com:8080`, enter `http://test-proxy.com:8081` (only change it if it's different).
+   - Authentication:
+     - Username: enter `test123` (only if different or empty).
+     - Password: enter `123` (only if different or empty; field may be masked).
+   - Ignore SSL Certificates:
+     - Click the middle of the toggle/chip to enable Ignore SSL Certificates (only if not already enabled).
+
+4. Return result:
+   - If the Proxy URL is set to `http://test-proxy.com:8081` AND Username is set to `test123` AND Password is provided AND Ignore SSL Certificates is enabled, return:
+     {"result": True, "phase": "setup_complete"}
+   - If there are any issues with configuration, return:
+     {"result": False, "phase": "setup_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If the configuration is successful, return:
+     {"result": True, "phase": "setup_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "setup_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
+
--- a/autoqa/tests/migration/settings/setup-local-api-server.txt
+++ b/autoqa/tests/migration/settings/setup-local-api-server.txt
@ -0,0 +1,76 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are setting up local API server settings in the OLD version of the Jan application.
+
+PHASE: SETUP LOCAL API SERVER (OLD VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (OLD version).
+   - If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+   - If **New Version Available** popup appears on app launch (older versions), click **Remind Me Later** to dismiss it before continuing.
+
+2. Navigate to Local API Server settings:
+   - In the bottom-left menu, click **Settings**.
+   - In the left sidebar, click **Local API Server**.
+
+3. Configure the server settings:
+   - Under **Server Configuration**, find the **Server Port** field.
+   - If the port is not already `1338`, change it to `1338`.
+   - Find the **API Prefix** field.
+   - If the prefix is not already `/v2`, change it to `/v2`.
+   - Find the **API Key** field (look for placeholder "Enter API Key").
+   - If the API Key field appears empty, enter `1234` as the API key value (this field is required for server startup).
+   - Verify the Server Port and API Prefix values are set to their target values.
+
+4. Test server functionality:
+   - Click the **Start Server** button.
+   - In the **Server Logs** area, click **Open Logs**.
+   - Verify the server starts successfully and shows startup logs.
+
+5. Return result:
+   - If the Server Port is set to `1338` AND the API Prefix is set to `/v2` AND the server starts successfully, return:
+     {"result": True, "phase": "setup_complete"}
+   - If there are any issues with configuration or server startup, return:
+     {"result": False, "phase": "setup_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If the configuration is successful and server works, return:
+     {"result": True, "phase": "setup_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "setup_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
--- a/autoqa/tests/migration/settings/verify-https-proxy-persistence.txt
+++ b/autoqa/tests/migration/settings/verify-https-proxy-persistence.txt
@ -0,0 +1,72 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```
+Thought: ...
+Action: ...
+```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use "\
+" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are verifying that HTTPS Proxy settings persist after upgrading to the NEW version of the Jan application.
+
+PHASE: VERIFY HTTPS PROXY PERSISTENCE (NEW VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (NEW version).
+   - If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+   - If **New Version Available** popup appears on app launch (older versions), click **Remind Me Later** to dismiss it before continuing.
+
+2. Navigate to HTTPS Proxy settings:
+   - In the bottom-left menu, click **Settings**.
+   - In the left sidebar, click **HTTPS Proxy**.
+
+3. Verify HTTPS Proxy settings:
+   - Proxy URL: Verify it contains `http://test-proxy.com:8081`.
+   - Authentication:
+     - Username: Verify it contains `test123`.
+     - Password: If the Password field is masked/hidden, skip explicit value verification.
+   - Ignore SSL Certificates: Verify that the toggle/chip is enabled.
+
+4. Return result:
+   - If the Proxy URL is `http://test-proxy.com:8081` AND Username is `test123` AND Ignore SSL Certificates is enabled, return:
+     {"result": True, "phase": "verification_complete"}
+   - If any of the settings have reverted or are missing, return:
+     {"result": False, "phase": "verification_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If the settings persist and are correct, return:
+     {"result": True, "phase": "verification_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "verification_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
+
--- a/autoqa/tests/migration/settings/verify-local-api-server-persistence.txt
+++ b/autoqa/tests/migration/settings/verify-local-api-server-persistence.txt
@ -0,0 +1,76 @@
+prompt = """
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+You are verifying that local API server settings persist after upgrading to the NEW version of the Jan application.
+
+PHASE: VERIFY LOCAL API SERVER PERSISTENCE (NEW VERSION)
+
+Step-by-step instructions:
+
+1. Open the Jan application (NEW version).
+   - If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing.
+   - If **New Version Available** popup appears on app launch (older versions), click **Remind Me Later** to dismiss it before continuing.
+
+2. Navigate to Local API Server settings:
+   - In the bottom-left menu, click **Settings**.
+   - In the left sidebar, click **Local API Server**.
+
+3. Verify the server settings persist:
+   - Under **Server Configuration**, check the **Server Port** field.
+   - Verify it still contains the value `1338` that was set in the old version.
+   - Check the **API Prefix** field.
+   - Verify it still contains the value `/v2` that was set in the old version.
+   - Check the **API Key** field (look for placeholder "Enter API Key").
+   - If the API Key field appears empty, enter `1234` as the API key value (this field is required for server startup).
+   - If the Server Port is not `1338` OR the API Prefix is not `/v2`, the verification fails.
+
+4. Test server functionality:
+   - Click the **Start Server** button.
+   - In the **Server Logs** area, click **Open Logs**.
+   - Verify the server starts successfully and shows startup logs.
+
+5. Return result:
+   - If the Server Port remains `1338` AND the API Prefix remains `/v2` AND the server starts successfully, return:
+     {"result": True, "phase": "verification_complete"}
+   - If any of the settings have reverted OR the server fails to start, return:
+     {"result": False, "phase": "verification_failed"}
+
+CRITICAL INSTRUCTIONS FOR FINAL RESPONSE:
+- You MUST respond in English only, not any other language
+- You MUST return ONLY the JSON format below, nothing else
+- Do NOT add any explanations, thoughts, or additional text
+
+   - If the settings persist and server works correctly, return:
+     {"result": True, "phase": "verification_complete"}
+   - If there are any issues, return:
+     {"result": False, "phase": "verification_failed"}
+
+IMPORTANT: 
+- Your response must be ONLY the JSON above
+- Do NOT add any other text before or after the JSON
+"""
--- a/autoqa/tests/template.txt
+++ b/autoqa/tests/template.txt
@ -0,0 +1,31 @@
+prompt = """
+
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+
+
+"""
--- a/autoqa/utils.py
+++ b/autoqa/utils.py
@ -286,10 +286,11 @@ def start_jan_app(jan_app_path=None):
        logger.error(f"Error starting Jan application: {e}")
        raise

-def scan_test_files(tests_dir="tests"):
+def scan_test_files(tests_dir="tests/base"):
    """
    Scan tests folder and find all .txt files
    Returns list with format [{'path': 'relative_path', 'prompt': 'file_content'}]
+    Note: Default changed to tests/base for current version testing
    """
    test_files = []
    tests_path = Path(tests_dir)
--- a/autoqa/windows-qa-checklist.md
+++ b/autoqa/windows-qa-checklist.md
@ -0,0 +1,256 @@
+# I. Before release 
+
+## A. Initial update / migration Data check
+
+Before testing, set-up the following in the old version to make sure that we can see the data is properly migrated:
+- [x] Changing appearance / theme to something that is obviously different from default set-up ✅
+- [x] Ensure there are a few chat threads 🔥🔥🔥
+- [x] Ensure there are a few favourites / star threads 🔥🔥🔥
+- [x] Ensure there are 2 model downloaded 🔥🔥
+- [x] Ensure there are 2 import on local provider (llama.cpp) 
+- [x] Modify MCP servers list and add some ENV value to MCP servers
+- [x] Modify Local API Server ✅
+- [x] HTTPS proxy config value ✅
+- [x] Add 2 custom assistants to Jan ✅
+- [x] Create a new chat with the custom assistant 🔥🔥🔥
+- [x] Change the `App Data` to some other folder
+- [x] Create a Custom Provider 🔥🔥 (Not verified yet)
+- [x] Disabled some model providers ✅ (Intermittently work)
+#### Validate that the update does not corrupt existing user data or settings (before and after update show the same information):
+- [x] Threads
+	- [x] Previously used model and assistants is shown correctly
+	- [x] Can resume chat in threads with the previous context
+- [x] Assistants
+- Settings:
+	- [x] Appearance
+	- [x] MCP Servers 
+	- [x] Local API Server
+	- [x] HTTPS Proxy
+- [x] Custom Provider Set-up
+
+#### In `Hub`:
+- [x] Can see model from HF listed properly ✅
+- [x] Downloaded model will show `Use` instead of `Download` ✅
+- [x] Toggling on `Downloaded` on the right corner show the correct list of downloaded models 🔥🔥
+
+#### In `Settings -> General`:
+- [x] Ensure the `App Data` path is the same ✅ 
+- [x] Click Open Logs, App Log will show ✅
+	
+#### In `Settings -> Model Providers`:
+- [x] Llama.cpp still listed downloaded models and user can chat with the models 🔥🔥🔥
+- [x] Llama.cpp still listed imported models and user can chat with the models 
+- [x] Remote model still retain previously set up API keys and user can chat with model from the provider without having to re-enter API keys
+- [x] Enabled and Disabled Model Providers stay the same as before update 🔥
+
+#### In `Settings -> Extensions`, check that following exists: ✅
+- [x] Conversational ✅ 
+- [x] Jan Assistant ✅
+- [x] Download Manager ✅ 
+- [x] llama.cpp Inference Engine ✅
+
+## B. `Settings` 
+
+#### In `General`:
+- [x] Ensure `Community` links work and point to the correct website 🔥🔥 (Scrolldown problem)
+- [x] Ensure the `Check for Updates` function detect the correct latest version ✅
+- [ ] [ENG] Create a folder with un-standard character as title (e.g. Chinese character) => change the `App data` location to that folder => test that model is still able to load and run properly.
+#### In `Appearance`:
+- [x] Toggle between different `Theme` options to check that they change accordingly and that all elements of the UI are legible with the right contrast:
+	- [x] Light ✅
+	- [x] Dark ✅
+	- [x] System (should follow your OS system settings) 🔥🔥🔥
+- [x] Change the following values => close the application => re-open the application => ensure that the change is persisted across session:
+	- [x] Theme ✅
+	- [x] Font Size ✅
+	- [x] Window Background 🔥
+	- [x] App Main View 🔥
+	- [x] Primary 🔥
+	- [x] Accent 🔥
+	- [x] Destructive 🔥
+	- [x] Chat Width 🔥
+		- [x] Ensure that when this value is changed, there is no broken UI caused by it 🔥
+	- [x] Code Block 🔥
+	- [x] Show Line Numbers 🔥
+- [ENG] Ensure that when click on `Reset` in the `Appearance` section, it reset back to the default values 🔥🔥
+- [ENG] Ensure that when click on `Reset` in the `Code Block` section, it reset back to the default values 🔥🔥
+
+#### In `Model Providers`:
+
+In `Llama.cpp`:
+- [x] After downloading a model from hub, the model is listed with the correct name under `Models` 🔥🔥🔥
+- [x] Can import `gguf` model with no error
+- [x] Imported model will be listed with correct name under the `Models`
+- [x] Check that when click `delete` the model will be removed from the list 🔥🔥
+- [x] Deleted model doesn't appear in the selectable models section in chat input (even in old threads that use the model previously)
+- [x] Ensure that user can re-import deleted imported models
+- [x] Enable `Auto-Unload Old Models`, and ensure that only one model can run / start at a time. If there are two model running at the time of enable, both of them will be stopped. 
+- [x] Disable `Auto-Unload Old Models`, and ensure that multiple models can run at the same time.
+- [x] Enable  `Context Shift` and ensure that context can run for long without encountering memory error. Use the `banana test` by turn on fetch MCP => ask local model to fetch and summarize the history of banana (banana has a very long history on wiki it turns out). It should run out of context memory sufficiently fast if `Context Shift` is not enabled.
+- [x] [New] Ensure that user can change the Jinja chat template of individual model and it doesn't affect the template of other model
+- [x] [New] Ensure that there is a recommended `llama.cpp` for each system and that it works out of the box for users. ✅
+
+In Remote Model Providers:
+- [x] Check that the following providers are presence:
+	- [x] OpenAI ✅
+	- [x] Anthropic ✅
+	- [x] Cohere ✅
+	- [x] OpenRouter ✅
+	- [x] Mistral ✅
+	- [x] Groq ✅
+	- [x] Gemini ✅
+	- [x] Hugging Face ✅
+- [x] Models should appear as available on the selectable dropdown in chat input once some value is input in the API key field. (it could be the wrong API key)
+- [x] Once a valid API key is used, user can select a model from that provider and chat without any error. 
+- [x] Delete a model and ensure that it doesn't show up in the `Modesl` list view or in the selectable dropdown in chat input.
+- [x] Ensure that a deleted model also not selectable or appear in old threads that used it.
+- [x] Adding of new model manually works and user can chat with the newly added model without error (you can add back the model you just delete for testing)
+
+In Custom Providers:
+- [x] Ensure that user can create a new custom providers with the right baseURL and API key.
+- [x] Click `Refresh` should retrieve a list of available models from the Custom Providers.
+- [x] User can chat with the custom providers
+- [x] Ensure that Custom Providers can be deleted and won't reappear in a new session
+
+In general:
+- [ ] Disabled Model Provider should not show up as selectable in chat input of new thread and old thread alike (old threads' chat input should show `Select Model` instead of disabled model)
+
+#### In `Shortcuts`:
+
+Make sure the following shortcut key combo is visible and works:
+- [x] New chat ✅
+- [x] Toggle Sidebar ✅
+- [x] Zoom In ✅
+- [x] Zoom Out ✅
+- [x] Send Message ✅
+- [x] New Line ✅
+- [x] Navigation ✅
+
+#### In `Hardware`:
+Ensure that the following section information show up for hardware
+- [x] Operating System ✅ 
+- [x] CPU ✅
+- [x] Memory ✅
+- [x] GPU (If the machine has one) ✅
+	- [x] Enabling and Disabling GPUs and ensure that model still run correctly in both mode
+	- [x] Enabling or Disabling GPU should not affect the UI of the application
+
+#### In `MCP Servers`:
+- [x] Ensure that enabling the `Experimental Features` under `Advanced` in `General` will make the `MCP Servers` appear in the `Settings` list. ✅
+- [x] Disable `Experimental Features` should also disable all the tools and not show up in chat input `Tools still show up in chat input`
+- [x] Ensure that an user can create a MCP server successfully when enter in the correct information
+- [x] Ensure that `Env` value is masked by `*` in the quick view.
+- [x] If an `Env` value is missing, there should be a error pop up.
+- [x] Ensure that deleted MCP server disappear from the `MCP Server` list without any error
+- [x] Ensure that before a MCP is deleted, it will be disable itself first and won't appear on the tool list after deleted.
+- [x] Ensure that when the content of a MCP server is edited, it will be updated and reflected accordingly in the UI and when running it.
+- [x] Toggling enable and disabled of a MCP server work properly
+- [x] A disabled MCP should not appear in the available tool list in chat input
+- [x] An disabled MCP should not be callable even when forced prompt by the model (ensure there is no ghost MCP server)
+- [x] Ensure that enabled MCP server start automatically upon starting of the application
+- [x] An enabled MCP should show functions in the available tool list
+- [x] User can use a model and call different tool from multiple enabled MCP servers in the same thread
+- [x] If `Allow All MCP Tool Permissions` is disabled, in every new thread, before a tool is called, there should be a confirmation dialog pop up to confirm the action.
+- [x] When the user click `Deny`, the tool call will not be executed and return a message indicate so in the tool call result.
+- [x] When the user click `Allow Once` on the pop up, a confirmation dialog will appear again when the tool is called next time.
+- [x] When the user click `Always Allow` on the pop up, the tool will retain permission and won't ask for confirmation again. (this applied at an individual tool level, not at the MCP server level)
+- [x] If `Allow All MCP Tool Permissions` is enabled, in every new thread,  there should not be any confirmation dialog pop up when a tool is called.
+- [x] [Windows OS] When a MCP tool is called, there is no terminal window pop-up or any flashing presence.
+- [x] When the pop-up appear, make sure that the `Tool Parameters` is also shown with detail in the pop-up.
+
+#### In `Local API Server`:
+- [x] User can `Start Server` and chat with the default endpoint
+	- [x] User should see the correct model name at `v1/models`
+	- [x] User should be able to chat with it at `v1/chat/completions`
+- [x] `Open Logs` show the correct query log send to the server and return from the server ✅
+- [x] Make sure that changing all the parameter in `Server Configuration` is reflected when `Start Server`
+
+#### In `HTTPS Proxy`:
+- [ ] Model download request goes through proxy endpoint
+
+## C. Hub
+- [x] User can click `Download` to download a model ✅
+- [x] User can cancel a model in the middle of downloading 🔥🔥🔥
+- [x] User can add a Hugging Face model detail to the list by pasting a model name / model url into the search bar and press enter ✅
+- [x] Clicking on a listing will open up the model card information within Jan and render the HTML properly
+- [x] Clicking download work on the `Show variants` section ✅
+- [x] Clicking download work inside the Model card HTML ✅
+
+## D. Threads
+
+#### In the left bar:
+- [x] User can delete an old thread, and it won't reappear even when app restart
+- [x] Change the title of the thread should update its last modification date and re-organise its position in the correct chronological order on the left bar.
+- [x] The title of a new thread is the first message from the user.
+- [x] Users can starred / un-starred threads accordingly
+- [x] Starred threads should move to `Favourite` section and other threads should stay in `Recent`
+- [x] Ensure that the search thread feature return accurate result based on thread titles and contents (including from both `Favourite` and `Recent`)
+- [x] `Delete All` should delete only threads in the `Recents` section
+- [x] `Unstar All` should un-star all of the `Favourites` threads and return them to `Recent`
+
+#### In a thread:
+- [x] When `New Chat` is clicked, the assistant is set as the last selected assistant, the model selected is set as the last used model, and the user can immediately chat with the model. 
+- [x] User can conduct multi-turn conversation in a single thread without lost of data (given that `Context Shift` is not enabled)
+- [x] User can change to a different model in the middle of a conversation in a thread and the model work.
+- [x] User can click on `Regenerate` button on a returned message from the model to get a new response base on the previous context.
+- [x] User can change `Assistant` in the middle of a conversation in a thread and the new assistant setting will be applied instead.
+- [x] The chat windows can render and show all the content of a selected threads (including scroll up and down on long threads)
+- [x] Old thread retained their setting as of the last update / usage
+	- [x] Assistant option
+	- [x] Model option (except if the model / model provider has been deleted or disabled)
+- [x] User can send message with different type of text content (e.g text, emoji, ...)
+- [x] When request model to generate a markdown table, the table is correctly formatted as returned from the model.
+- [x] When model generate code, ensure that the code snippets is properly formatted according to the `Appearance -> Code Block` setting.
+- [x] Users can edit their old message and and user can regenerate the answer based on the new message
+- [x] User can click `Copy` to copy the model response
+- [x] User can click `Delete` to delete either the user message or the model response.
+- [x] The token speed appear when a response from model is being generated and the final value is show under the response. 
+- [ ] [New] Make sure that user when using IME keyboard to type Chinese and Japanese character and they press `Enter`, the `Send` button doesn't trigger automatically after each words.
+
+## E. Assistants
+- [x] There is always at least one default Assistant which is Jan ✅
+- [x] The default Jan assistant has `stream = True` by default 
+- [x] User can create / edit a new assistant with different parameters and instructions choice. 🔥
+- [x] When user delete the default Assistant, the next Assistant in line will be come the default Assistant and apply their setting to new chat accordingly.
+- [x] User can create / edit assistant from within a Chat windows (on the top left)
+
+## F. After checking everything else
+
+In `Settings -> General`:
+- [x] Change the location of the `App Data` to some other path that is not the default path
+- [x] Click on `Reset` button in `Other` to factory reset the app:
+	- [x] All threads deleted
+	- [x] All Assistant deleted except for default Jan Assistant
+	- [x] `App Data` location is reset back to default path
+	- [x] Appearance reset
+	- [x] Model Providers information all reset
+		- [x] Llama.cpp setting reset
+		- [x] API keys cleared
+		- [x] All Custom Providers deleted
+	- [x] Shortcuts reset
+	- [x] MCP Servers reset
+	- [x] Local API Server reset
+	- [x] HTTPS Proxy reset
+- [x] After closing the app, all models are unloaded properly
+- [x] Locate to the data folder using the `App Data` path information => delete the folder => reopen the app to check that all the folder is re-created with all the necessary data.
+- [x] Ensure that the uninstallation process removes the app successfully from the system.
+## G. New App Installation
+- [x] Clean up by deleting all the left over folder created by Jan
+	- [ ] On MacOS
+		- [ ] `~/Library/Application Support/Jan`
+		- [ ] `~/Library/Caches/jan.ai.app`
+	- [x] On Windows
+		- [x] `C:\Users<Username>\AppData\Roaming\Jan\`
+		- [x] `C:\Users<Username>\AppData\Local\jan.ai.app`
+	- [ ] On Linux
+		- [ ] `~/.cache/Jan`
+		- [ ] `~/.cache/jan.ai.app`
+		- [ ] `~/.local/share/Jan`
+		- [ ] `~/.local/share/jan.ai.app`
+- [x] Ensure that the fresh install of Jan launch
+- [x] Do some basic check to see that all function still behaved as expected. To be extra careful, you can go through the whole list again. However, it is more advisable to just check to make sure that all the core functionality like `Thread` and `Model Providers` work as intended.
+
+# II. After release
+- [ ] Check that the App Updater works and user can update to the latest release without any problem
+- [ ] App restarts after the user finished an update
+- [ ] Repeat section `A. Initial update / migration Data check` above to verify that update is done correctly on live version
Author	SHA1	Message	Date
Minh141120	4d21d78755	tests: add apperance font size test	2025-09-04 10:18:43 +07:00
Minh141120	ca930d0f90	chore: update qa checklist	2025-09-04 09:08:31 +07:00
Minh141120	7e41e1b0b6	test: add testcases for light and dark theme on apperance	2025-09-04 09:06:15 +07:00
Minh141120	a1007b51b6	test: add testcase for disable some model providers	2025-09-03 17:33:53 +07:00
Minh141120	73c55bcd37	test: add testcase change theme/appearance	2025-09-03 15:58:47 +07:00
Minh141120	30237256f8	test: update qa checklist to include create 2 custom assistants	2025-09-03 15:08:20 +07:00
Minh141120	30e878ab7f	chore: comment out batch migration testcase	2025-09-03 15:07:16 +07:00
Minh141120	f909345259	chore: update migration testcase	2025-09-03 15:06:41 +07:00
Minh141120	f45d283e88	test: add migration modify http proxy test case	2025-09-03 14:53:17 +07:00
Minh141120	af5afc00f6	test: add migration modify local api server test case	2025-09-03 13:45:38 +07:00
Minh141120	703395ae10	feat: add recommended backend testcase	2025-08-22 12:47:02 +07:00
Minh141120	054f64bd54	feat: add download model on model card and download model on show variants testcases	2025-08-22 11:46:25 +07:00
Minh141120	4bac3f9838	chore: update system dependencies for mac	2025-08-21 20:13:32 +07:00
Minh141120	1ae3201a69	chore: update libwebkit for ubuntu	2025-08-21 20:09:04 +07:00
Minh141120	274358f84a	chỏe: update default url for ubuntu and macos	2025-08-21 16:58:21 +07:00
Minh141120	0e72bfd117	chore: temporarily disable reportportal	2025-08-21 16:28:55 +07:00
Minh141120	8bbc0009f6	chore: update default installer for windows	2025-08-21 16:20:29 +07:00
Minh141120	44be3e8b92	feat: update args migration test case	2025-08-21 16:19:37 +07:00
Minh141120	4159d11fd4	feat: add autoqa migration workflow	2025-08-21 16:11:43 +07:00
Minh141120	82eb76a564	feat: add local api server log testcase and download model with url testcase	2025-08-21 15:19:32 +07:00
Minh141120	dab2941630	feat: add 2 new testcases	2025-08-21 13:08:08 +07:00
Minh141120	a8837f9d76	chore: update custom runs args	2025-08-20 21:26:22 +07:00
Minh141120	be547224fb	chore: update default value for nightly	2025-08-20 21:05:21 +07:00
Minh141120	6021e0309f	ci: add autoqa reliability macos and linux jobs	2025-08-20 21:02:41 +07:00
Minh141120	00b03ad4e3	feat: add autoqa reliablity ci manual	2025-08-20 18:14:10 +07:00
Minh141120	0225bb6b1e	feat: add prompt evaluation script for develop and deploy	2025-08-20 17:30:29 +07:00
Minh141120	b6813f1c7a	feat: added testcase and update qa-checklist	2025-08-19 21:59:28 +07:00
Minh141120	3d764a92d3	chore: add migration utils	2025-08-15 12:06:52 +07:00
Minh141120	a74ff32443	chore: add import migration	2025-08-15 12:05:46 +07:00
Minh141120	8ca0e98e57	feat: add migration testing [WIP]	2025-08-15 09:57:44 +07:00