From 8ca0e98e576708afb1bf3bdf579d7ecbaf92e054 Mon Sep 17 00:00:00 2001 From: Minh141120 Date: Fri, 15 Aug 2025 09:57:44 +0700 Subject: [PATCH] feat: add migration testing [WIP] --- autoqa/COMMAND_REFERENCE.md | 382 +++++++++++++++ autoqa/MIGRATION_TESTING.md | 370 ++++++++++++++ autoqa/QUICK_START.md | 213 ++++++++ autoqa/batch_migration_runner.py | 413 ++++++++++++++++ autoqa/individual_migration_runner.py | 462 ++++++++++++++++++ autoqa/reportportal_handler.py | 111 ++++- autoqa/tests/base/default-jan-assistant.txt | 19 + autoqa/tests/base/enable-mcp-server.txt | 22 + autoqa/tests/base/extensions.txt | 22 + autoqa/tests/base/hardware-info.txt | 60 +++ autoqa/tests/base/providers-available.txt | 22 + .../user-start-chatting.txt} | 2 +- .../assistants/setup-chat-with-assistant.txt | 46 ++ .../assistants/setup-create-assistants.txt | 53 ++ ...verify-chat-with-assistant-persistence.txt | 37 ++ .../verify-create-assistant-persistence.txt | 48 ++ .../models/setup-download-models.txt | 51 ++ .../models/verify-model-persistence.txt | 39 ++ 18 files changed, 2344 insertions(+), 28 deletions(-) create mode 100644 autoqa/COMMAND_REFERENCE.md create mode 100644 autoqa/MIGRATION_TESTING.md create mode 100644 autoqa/QUICK_START.md create mode 100644 autoqa/batch_migration_runner.py create mode 100644 autoqa/individual_migration_runner.py create mode 100644 autoqa/tests/base/default-jan-assistant.txt create mode 100644 autoqa/tests/base/enable-mcp-server.txt create mode 100644 autoqa/tests/base/extensions.txt create mode 100644 autoqa/tests/base/hardware-info.txt create mode 100644 autoqa/tests/base/providers-available.txt rename autoqa/tests/{new-user/1-user-start-chatting.txt => base/user-start-chatting.txt} (99%) create mode 100644 autoqa/tests/migration/assistants/setup-chat-with-assistant.txt create mode 100644 autoqa/tests/migration/assistants/setup-create-assistants.txt create mode 100644 autoqa/tests/migration/assistants/verify-chat-with-assistant-persistence.txt create mode 100644 autoqa/tests/migration/assistants/verify-create-assistant-persistence.txt create mode 100644 autoqa/tests/migration/models/setup-download-models.txt create mode 100644 autoqa/tests/migration/models/verify-model-persistence.txt diff --git a/autoqa/COMMAND_REFERENCE.md b/autoqa/COMMAND_REFERENCE.md new file mode 100644 index 000000000..5978f31a8 --- /dev/null +++ b/autoqa/COMMAND_REFERENCE.md @@ -0,0 +1,382 @@ +# AutoQA Command Reference + +πŸ“š Complete reference for all AutoQA command line arguments and options. + +## Command Line Arguments + +### Basic Syntax + +```bash +python main.py [OPTIONS] +``` + +### Argument Groups + +Arguments are organized into logical groups for easier understanding and usage. + +## Computer Server Configuration + +| Argument | Environment Variable | Default | Description | +|----------|---------------------|---------|-------------| +| `--skip-server-start` | `SKIP_SERVER_START` | `false` | Skip automatic computer server startup | + +**Examples:** +```bash +# Auto-start computer server (default) +python main.py + +# Use external computer server +python main.py --skip-server-start + +# Using environment variable +SKIP_SERVER_START=true python main.py +``` + +## ReportPortal Configuration + +| Argument | Environment Variable | Default | Description | +|----------|---------------------|---------|-------------| +| `--enable-reportportal` | `ENABLE_REPORTPORTAL` | `false` | Enable ReportPortal integration | +| `--rp-endpoint` | `RP_ENDPOINT` | `https://reportportal.menlo.ai` | ReportPortal endpoint URL | +| `--rp-project` | `RP_PROJECT` | `default_personal` | ReportPortal project name | +| `--rp-token` | `RP_TOKEN` | - | ReportPortal API token (required when RP enabled) | +| `--launch-name` | `LAUNCH_NAME` | - | Custom launch name for ReportPortal | + +**Examples:** +```bash +# Basic ReportPortal integration +python main.py --enable-reportportal --rp-token "YOUR_TOKEN" + +# Full ReportPortal configuration +python main.py \ + --enable-reportportal \ + --rp-endpoint "https://reportportal.example.com" \ + --rp-project "my_project" \ + --rp-token "YOUR_TOKEN" \ + --launch-name "Custom Test Run" + +# Using environment variables +ENABLE_REPORTPORTAL=true RP_TOKEN=secret python main.py +``` + +## Jan Application Configuration + +| Argument | Environment Variable | Default | Description | +|----------|---------------------|---------|-------------| +| `--jan-app-path` | `JAN_APP_PATH` | auto-detected | Path to Jan application executable | +| `--jan-process-name` | `JAN_PROCESS_NAME` | platform-specific | Jan process name for monitoring | + +**Platform-specific defaults:** +- **Windows**: `Jan.exe` +- **macOS**: `Jan` +- **Linux**: `Jan-nightly` + +**Examples:** +```bash +# Custom Jan app path +python main.py --jan-app-path "C:/Custom/Path/Jan.exe" + +# Custom process name +python main.py --jan-process-name "Jan-nightly.exe" + +# Using environment variable +JAN_APP_PATH="D:/Apps/Jan/Jan.exe" python main.py +``` + +## Model Configuration + +| Argument | Environment Variable | Default | Description | +|----------|---------------------|---------|-------------| +| `--model-loop` | `MODEL_LOOP` | `uitars` | Agent loop type | +| `--model-provider` | `MODEL_PROVIDER` | `oaicompat` | Model provider | +| `--model-name` | `MODEL_NAME` | `ByteDance-Seed/UI-TARS-1.5-7B` | AI model name | +| `--model-base-url` | `MODEL_BASE_URL` | `http://10.200.108.58:1234/v1` | Model API endpoint | + +**Examples:** +```bash +# OpenAI GPT-4 +python main.py \ + --model-provider "openai" \ + --model-name "gpt-4" \ + --model-base-url "https://api.openai.com/v1" + +# Anthropic Claude +python main.py \ + --model-provider "anthropic" \ + --model-name "claude-3-sonnet-20240229" \ + --model-base-url "https://api.anthropic.com" + +# Custom local model +python main.py \ + --model-name "my-custom-model" \ + --model-base-url "http://localhost:8000/v1" + +# Using environment variables +MODEL_NAME=gpt-4 MODEL_BASE_URL=https://api.openai.com/v1 python main.py +``` + +## Test Execution Configuration + +| Argument | Environment Variable | Default | Description | +|----------|---------------------|---------|-------------| +| `--max-turns` | `MAX_TURNS` | `30` | Maximum number of turns per test | +| `--tests-dir` | `TESTS_DIR` | `tests` | Directory containing test files | +| `--delay-between-tests` | `DELAY_BETWEEN_TESTS` | `3` | Delay between tests (seconds) | + +**Examples:** +```bash +# Increase turn limit +python main.py --max-turns 50 + +# Custom test directory +python main.py --tests-dir "my_tests" + +# Longer delay between tests +python main.py --delay-between-tests 10 + +# Using environment variables +MAX_TURNS=50 DELAY_BETWEEN_TESTS=5 python main.py +``` + +## Migration Testing Arguments + +**Note**: These arguments are planned for future implementation based on your sample commands. + +| Argument | Environment Variable | Default | Description | +|----------|---------------------|---------|-------------| +| `--enable-migration-test` | `ENABLE_MIGRATION_TEST` | `false` | Enable migration testing mode | +| `--migration-test-case` | `MIGRATION_TEST_CASE` | - | Specific migration test case to run | +| `--migration-batch-mode` | `MIGRATION_BATCH_MODE` | `false` | Use batch mode for migration tests | +| `--old-version` | `OLD_VERSION` | - | Path to old version installer | +| `--new-version` | `NEW_VERSION` | - | Path to new version installer | + +**Examples:** +```bash +# Basic migration test +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants" \ + --old-version "C:\path\to\old\installer.exe" \ + --new-version "C:\path\to\new\installer.exe" + +# Batch mode migration test +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants-complete" \ + --migration-batch-mode \ + --old-version "C:\path\to\old\installer.exe" \ + --new-version "C:\path\to\new\installer.exe" + +# Using environment variables +ENABLE_MIGRATION_TEST=true \ +MIGRATION_TEST_CASE=assistants \ +OLD_VERSION="C:\path\to\old.exe" \ +NEW_VERSION="C:\path\to\new.exe" \ +python main.py +``` + +## Complete Command Examples + +### Basic Testing + +```bash +# Run all tests with defaults +python main.py + +# Run specific test category +python main.py --tests-dir "tests/base" + +# Custom configuration +python main.py \ + --max-turns 50 \ + --model-name "gpt-4" \ + --model-base-url "https://api.openai.com/v1" \ + --tests-dir "tests/base" +``` + +### Migration Testing + +```bash +# Simple migration test +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants" \ + --old-version "Jan_0.6.6.exe" \ + --new-version "Jan_0.6.7.exe" \ + --max-turns 65 + +# Complete migration test with ReportPortal +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants-complete" \ + --migration-batch-mode \ + --old-version "Jan_0.6.6.exe" \ + --new-version "Jan_0.6.7.exe" \ + --max-turns 75 \ + --enable-reportportal \ + --rp-token "YOUR_TOKEN" \ + --rp-project "jan_migration_tests" +``` + +### Advanced Configuration + +```bash +# Full custom configuration +python main.py \ + --skip-server-start \ + --enable-reportportal \ + --rp-endpoint "https://custom.rp.com" \ + --rp-project "jan_tests" \ + --rp-token "YOUR_TOKEN" \ + --jan-app-path "C:/Custom/Jan/Jan.exe" \ + --jan-process-name "Jan-custom.exe" \ + --model-provider "openai" \ + --model-name "gpt-4-turbo" \ + --model-base-url "https://api.openai.com/v1" \ + --max-turns 100 \ + --tests-dir "custom_tests" \ + --delay-between-tests 5 +``` + +## Environment Variables Summary + +### Computer Server +- `SKIP_SERVER_START`: Skip auto computer server startup + +### ReportPortal +- `ENABLE_REPORTPORTAL`: Enable ReportPortal integration +- `RP_ENDPOINT`: ReportPortal endpoint URL +- `RP_PROJECT`: ReportPortal project name +- `RP_TOKEN`: ReportPortal API token +- `LAUNCH_NAME`: Custom launch name + +### Jan Application +- `JAN_APP_PATH`: Path to Jan executable +- `JAN_PROCESS_NAME`: Jan process name + +### Model Configuration +- `MODEL_LOOP`: Agent loop type +- `MODEL_PROVIDER`: Model provider +- `MODEL_NAME`: AI model name +- `MODEL_BASE_URL`: Model API endpoint + +### Test Execution +- `MAX_TURNS`: Maximum turns per test +- `TESTS_DIR`: Test files directory +- `DELAY_BETWEEN_TESTS`: Delay between tests + +### Migration Testing (Planned) +- `ENABLE_MIGRATION_TEST`: Enable migration mode +- `MIGRATION_TEST_CASE`: Migration test case +- `MIGRATION_BATCH_MODE`: Use batch mode +- `OLD_VERSION`: Old installer path +- `NEW_VERSION`: New installer path + +## Help and Information + +### Get Help +```bash +# Show all available options +python main.py --help + +# Show help for specific section +python main.py --help | grep -A 10 "Migration" +``` + +### Version Information +```bash +# Check Python version +python --version + +# Check AutoQA installation +python -c "import autoqa; print(autoqa.__version__)" +``` + +### Debug Information +```bash +# Enable debug logging +export LOG_LEVEL=DEBUG +export PYTHONPATH=. + +# Run with verbose output +python main.py --enable-migration-test ... +``` + +## Best Practices + +### 1. Use Environment Variables +```bash +# Set common configuration +export MAX_TURNS=65 +export MODEL_NAME="gpt-4" +export JAN_APP_PATH="C:\path\to\Jan.exe" + +# Use in commands +python main.py --max-turns "$MAX_TURNS" +``` + +### 2. Combine Arguments Logically +```bash +# Group related arguments +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants" \ + --old-version "old.exe" \ + --new-version "new.exe" \ + --max-turns 65 \ + --enable-reportportal \ + --rp-token "token" +``` + +### 3. Use Absolute Paths +```bash +# Windows +--old-version "C:\Users\username\Downloads\Jan_0.6.6.exe" + +# Linux/macOS +--old-version "/home/user/downloads/Jan_0.6.6.deb" +``` + +### 4. Test Incrementally +```bash +# Start simple +python main.py + +# Add migration +python main.py --enable-migration-test ... + +# Add ReportPortal +python main.py --enable-migration-test ... --enable-reportportal ... +``` + +## Troubleshooting Commands + +### Check Dependencies +```bash +# Verify Python packages +pip list | grep -E "(autoqa|computer|agent)" + +# Check imports +python -c "import computer, agent, autoqa; print('All imports successful')" +``` + +### Check Configuration +```bash +# Validate arguments +python main.py --help + +# Test specific configuration +python main.py --jan-app-path "nonexistent" 2>&1 | grep "not found" +``` + +### Debug Mode +```bash +# Enable debug logging +export LOG_LEVEL=DEBUG +export PYTHONPATH=. + +# Run with debug output +python main.py --enable-migration-test ... +``` + +For more detailed information, see [MIGRATION_TESTING.md](MIGRATION_TESTING.md), [QUICK_START.md](QUICK_START.md), and [README.md](README.md). diff --git a/autoqa/MIGRATION_TESTING.md b/autoqa/MIGRATION_TESTING.md new file mode 100644 index 000000000..ba7750f70 --- /dev/null +++ b/autoqa/MIGRATION_TESTING.md @@ -0,0 +1,370 @@ +# AutoQA Migration Testing Guide + +πŸš€ Comprehensive guide for running migration tests with AutoQA to verify data persistence across Jan application upgrades. + +## Table of Contents + +1. [Overview](#overview) +2. [Prerequisites](#prerequisites) +3. [Basic Workflow (Base Test Cases)](#basic-workflow-base-test-cases) +4. [Migration Testing](#migration-testing) +5. [Migration Test Cases](#migration-test-cases) +6. [Running Migration Tests](#running-migration-tests) +7. [Advanced Configuration](#advanced-configuration) +8. [Troubleshooting](#troubleshooting) +9. [Examples](#examples) + +## Overview + +AutoQA provides comprehensive testing capabilities for the Jan application, including: + +- **Base Test Cases**: Standard functionality testing (assistants, models, extensions, etc.) +- **Migration Testing**: Verify data persistence and functionality across application upgrades +- **Batch Mode**: Run multiple test phases efficiently +- **Screen Recording**: Capture test execution for debugging +- **ReportPortal Integration**: Upload test results and artifacts + +## Prerequisites + +Before running migration tests, ensure you have: + +1. **Python Environment**: Python 3.8+ with required packages +2. **Jan Installers**: Both old and new version installers +3. **Test Environment**: Clean system or virtual machine +4. **Dependencies**: All AutoQA requirements installed + +```bash +# Install dependencies +pip install -r requirements.txt + +``` + +## Basic Workflow (Base Test Cases) + +### Running Standard Tests + +Base test cases verify core Jan functionality without version upgrades: + +```bash +# Run all base tests +python main.py + +# Run specific test directory +python main.py --tests-dir "tests/base" + +# Run with custom configuration +python main.py \ + --max-turns 50 +``` + +### Available Base Test Cases + +| Test Case | File | Description | +|-----------|------|-------------| +| Default Assistant | `tests/base/default-jan-assistant.txt` | Verify Jan default assistant exists | +| Extensions | `tests/base/extensions.txt` | Check available extensions | +| Hardware Info | `tests/base/hardware-info.txt` | Verify hardware information display | +| Model Providers | `tests/base/providers-available.txt` | Check model provider availability | +| User Chat | `tests/base/user-start-chatting.txt` | Test basic chat functionality | +| MCP Server | `tests/base/enable-mcp-server.txt` | Test experimental features | + +## Migration Testing + +Migration testing verifies that user data and configurations persist correctly when upgrading Jan from one version to another. + +### Migration Test Flow + +``` +1. Install OLD version β†’ Run SETUP tests +2. Install NEW version β†’ Run VERIFICATION tests +3. Compare results and verify persistence +``` + +### Migration Test Approaches + +#### Individual Mode +- Runs one test case at a time +- More granular debugging +- Better for development and troubleshooting + +#### Batch Mode +- Runs all setup tests first, then upgrades, then all verification tests +- More realistic user experience +- Faster execution for multiple test cases + +## Migration Test Cases + +### Available Migration Test Cases + +| Test Case Key | Name | Description | Setup Tests | Verification Tests | +|---------------|------|-------------|-------------|-------------------| +| `models` | Model Downloads Migration | Tests downloaded models persist after upgrade | `models/setup-download-models.txt` | `models/verify-model-persistence.txt` | +| `assistants` | Custom Assistants Migration | Tests custom assistants persist after upgrade | `assistants/setup-create-assistants.txt` | `assistants/verify-create-assistant-persistence.txt` | +| `assistants-complete` | Complete Assistants Migration | Tests both creation and chat functionality | Multiple setup tests | Multiple verification tests | + +### Test Case Details + +#### Models Migration Test +- **Setup**: Downloads models, configures settings, tests functionality +- **Verification**: Confirms models persist, settings maintained, functionality intact + +#### Assistants Migration Test +- **Setup**: Creates custom assistants with specific configurations +- **Verification**: Confirms assistants persist with correct metadata and settings + +#### Assistants Complete Migration Test +- **Setup**: Creates assistants AND tests chat functionality +- **Verification**: Confirms both creation and chat data persist correctly + +## Running Migration Tests + +### Basic Migration Test Command + +```bash +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants" \ + --old-version "path/to/old/installer.exe" \ + --new-version "path/to/new/installer.exe" \ + --max-turns 65 +``` + +### Batch Mode Migration Test + +```bash +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants-complete" \ + --migration-batch-mode \ + --old-version "path/to/old/installer.exe" \ + --new-version "path/to/new/installer.exe" \ + --max-turns 75 +``` + +### Command Line Arguments + +| Argument | Description | Required | Example | +|----------|-------------|----------|---------| +| `--enable-migration-test` | Enable migration testing mode | Yes | `--enable-migration-test` | +| `--migration-test-case` | Specific test case to run | Yes | `--migration-test-case "assistants"` | +| `--migration-batch-mode` | Use batch mode for multiple tests | No | `--migration-batch-mode` | +| `--old-version` | Path to old version installer | Yes | `--old-version "C:\path\to\old.exe"` | +| `--new-version` | Path to new version installer | Yes | `--new-version "C:\path\to\new.exe"` | +| `--max-turns` | Maximum turns per test phase | No | `--max-turns 75` | + +### Environment Variables + +You can also use environment variables for cleaner commands: + +```bash +# Set environment variables +export OLD_VERSION="C:\path\to\old\installer.exe" +export NEW_VERSION="C:\path\to\new\installer.exe" +export MIGRATION_TEST_CASE="assistants" +export MAX_TURNS=65 + +# Run with environment variables +python main.py \ + --enable-migration-test \ + --migration-test-case "$MIGRATION_TEST_CASE" \ + --old-version "$OLD_VERSION" \ + --new-version "$NEW_VERSION" \ + --max-turns "$MAX_TURNS" +``` + +## Advanced Configuration + +### Custom Model Configuration + +```bash +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants" \ + --old-version "path/to/old.exe" \ + --new-version "path/to/new.exe" \ + --model-name "gpt-4" \ + --model-provider "openai" \ + --model-base-url "https://api.openai.com/v1" \ + --max-turns 80 +``` + +### ReportPortal Integration + +```bash +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants" \ + --old-version "path/to/old.exe" \ + --new-version "path/to/new.exe" \ + --enable-reportportal \ + --rp-token "YOUR_TOKEN" \ + --rp-project "jan_migration_tests" \ + --max-turns 65 +``` + +### Custom Test Directory + +```bash +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants" \ + --old-version "path/to/old.exe" \ + --new-version "path/to/new.exe" \ + --tests-dir "custom_tests" \ + --max-turns 65 +``` + +## Examples + +### Example 1: Basic Assistants Migration Test + +```bash +# Test custom assistants persistence +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants" \ + --old-version "C:\Users\ziczac computer\Downloads\Jan_0.6.6_x64-setup.exe" \ + --new-version "C:\Users\ziczac computer\Downloads\Jan_0.6.7_x64-setup.exe" \ + --max-turns 65 +``` + +**What this does:** +1. Installs Jan 0.6.6 +2. Creates custom assistants (Python Tutor, Creative Writer) +3. Upgrades to Jan 0.6.7 +4. Verifies assistants persist with correct settings + +### Example 2: Complete Assistants Migration (Batch Mode) + +```bash +# Test both creation and chat functionality +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants-complete" \ + --migration-batch-mode \ + --old-version "C:\Users\ziczac computer\Downloads\Jan_0.6.6_x64-setup.exe" \ + --new-version "C:\Users\ziczac computer\Downloads\Jan_0.6.7_x64-setup.exe" \ + --max-turns 75 +``` + +**What this does:** +1. Installs Jan 0.6.6 +2. Creates custom assistants +3. Tests chat functionality with assistants +4. Upgrades to Jan 0.6.7 +5. Verifies both creation and chat data persist + +### Example 3: Models Migration Test + +```bash +# Test model downloads and settings persistence +python main.py \ + --enable-migration-test \ + --migration-test-case "models" \ + --old-version "C:\Users\ziczac computer\Downloads\Jan_0.6.6_x64-setup.exe" \ + --new-version "C:\Users\ziczac computer\Downloads\Jan_0.6.7_x64-setup.exe" \ + --max-turns 60 +``` + +**What this does:** +1. Installs Jan 0.6.6 +2. Downloads models (jan-nano-gguf, gemma-2-2b-instruct-gguf) +3. Configures model settings +4. Upgrades to Jan 0.6.7 +5. Verifies models persist and settings maintained + +## Troubleshooting + +### Common Issues + +#### 1. Installer Path Issues +```bash +# Use absolute paths with proper escaping +--old-version "C:\Users\ziczac computer\Downloads\Jan_0.6.6_x64-setup.exe" +--new-version "C:\Users\ziczac computer\Downloads\Jan_0.6.7_x64-setup.exe" +``` + +#### 2. Turn Limit Too Low +```bash +# Increase max turns for complex tests +--max-turns 75 # Instead of default 30 +``` + +#### 3. Test Case Not Found +```bash +# Verify test case key exists +--migration-test-case "assistants" # Valid: models, assistants, assistants-complete +``` + +#### 4. Permission Issues +```bash +# Run as administrator on Windows +# Use sudo on Linux/macOS for system-wide installations +``` + +### Debug Mode + +Enable detailed logging for troubleshooting: + +```bash +# Set logging level +export PYTHONPATH=. +export LOG_LEVEL=DEBUG + +# Run with verbose output +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants" \ + --old-version "path/to/old.exe" \ + --new-version "path/to/new.exe" \ + --max-turns 65 +``` + +### Test Results + +Migration tests generate detailed results: + +- **Setup Phase Results**: Success/failure for each setup test +- **Upgrade Results**: Installation success status +- **Verification Phase Results**: Success/failure for each verification test +- **Overall Success**: Combined result from all phases + +### Output Files + +Tests generate several output files: + +- **Trajectories**: `trajectories/` - Agent interaction logs +- **Recordings**: `recordings/` - Screen recordings (MP4) +- **Logs**: Console output with detailed execution information + +## Best Practices + +### 1. Test Environment +- Use clean virtual machines or fresh system installations +- Ensure sufficient disk space for installers and test data +- Close other applications during testing + +### 2. Test Data +- Use realistic test data (assistant names, descriptions, instructions) +- Test with multiple models and configurations +- Verify edge cases and error conditions + +### 3. Execution +- Start with individual mode for debugging +- Use batch mode for production testing +- Monitor system resources during execution + +### 4. Validation +- Verify test results manually when possible +- Check generated artifacts (trajectories, recordings) +- Compare expected vs. actual behavior + +## Next Steps + +1. **Start Simple**: Begin with basic migration tests +2. **Add Complexity**: Gradually test more complex scenarios +3. **Automate**: Integrate into CI/CD pipelines +4. **Extend**: Add new test cases for specific features +5. **Optimize**: Refine test parameters and configurations + +For more information, see the main [README.md](README.md) and explore the test files in the `tests/` directory. diff --git a/autoqa/QUICK_START.md b/autoqa/QUICK_START.md new file mode 100644 index 000000000..8127114c7 --- /dev/null +++ b/autoqa/QUICK_START.md @@ -0,0 +1,213 @@ +# AutoQA Quick Start Guide + +πŸš€ Get started with AutoQA in minutes - from basic testing to migration verification. + +## Quick Start + +### 1. Install Dependencies + +```bash +# Install required packages +pip install -r requirements.txt +``` + +### 2. Basic Testing (No Migration) + +```bash +# Run all base tests +python main.py + +# Run specific test category +python main.py --tests-dir "tests/base" + +# Custom configuration +python main.py \ + --max-turns 50 +``` + +### 3. Migration Testing + +```bash +# Basic migration test +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants" \ + --old-version "C:\path\to\old\installer.exe" \ + --new-version "C:\path\to\new\installer.exe" \ + --max-turns 65 + +# Batch mode migration test +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants-complete" \ + --migration-batch-mode \ + --old-version "C:\path\to\old\installer.exe" \ + --new-version "C:\path\to\new\installer.exe" \ + --max-turns 75 +``` + +## Test Types + +### Base Test Cases +- **Default Assistant**: Verify Jan default assistant exists +- **Extensions**: Check available extensions +- **Hardware Info**: Verify hardware information display +- **Model Providers**: Check model provider availability +- **User Chat**: Test basic chat functionality +- **MCP Server**: Test experimental features + +### Migration Test Cases +- **`models`**: Test downloaded models persist after upgrade +- **`assistants`**: Test custom assistants persist after upgrade +- **`assistants-complete`**: Test both creation and chat functionality + +## Common Commands + +### Basic Workflow +```bash +# Run all tests +python main.py + +# Run with ReportPortal +python main.py --enable-reportportal --rp-token "YOUR_TOKEN" + +# Custom test directory +python main.py --tests-dir "my_tests" + +# Skip computer server auto-start +python main.py --skip-server-start +``` + +### Migration Workflow +```bash +# Test assistants migration +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants" \ + --old-version "path/to/old.exe" \ + --new-version "path/to/new.exe" + +# Test models migration +python main.py \ + --enable-migration-test \ + --migration-test-case "models" \ + --old-version "path/to/old.exe" \ + --new-version "path/to/new.exe" + +# Test complete assistants migration (batch mode) +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants-complete" \ + --migration-batch-mode \ + --old-version "path/to/old.exe" \ + --new-version "path/to/new.exe" +``` + +## Configuration Options + +### Essential Arguments +| Argument | Description | Default | +|----------|-------------|---------| +| `--max-turns` | Maximum turns per test | 30 | +| `--tests-dir` | Test files directory | `tests` | +| `--jan-app-path` | Jan executable path | auto-detected | +| `--model-name` | AI model name | UI-TARS-1.5-7B | + +### Migration Arguments +| Argument | Description | Required | +|----------|-------------|----------| +| `--enable-migration-test` | Enable migration mode | Yes | +| `--migration-test-case` | Test case to run | Yes | +| `--migration-batch-mode` | Use batch mode | No | +| `--old-version` | Old installer path | Yes | +| `--new-version` | New installer path | Yes | + +### ReportPortal Arguments +| Argument | Description | Required | +|----------|-------------|----------| +| `--enable-reportportal` | Enable RP integration | No | +| `--rp-token` | ReportPortal token | Yes (if RP enabled) | +| `--rp-endpoint` | RP endpoint URL | No | +| `--rp-project` | RP project name | No | + +## Environment Variables + +```bash +# Set common variables +export MAX_TURNS=65 +export MODEL_NAME="gpt-4" +export MODEL_BASE_URL="https://api.openai.com/v1" +export JAN_APP_PATH="C:\path\to\Jan.exe" + +# Use in commands +python main.py --max-turns "$MAX_TURNS" +``` + +## Examples + +### Example 1: Basic Testing +```bash +# Test core functionality +python main.py \ + --max-turns 40 \ + --tests-dir "tests/base" +``` + +### Example 2: Simple Migration +```bash +# Test assistants persistence +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants" \ + --old-version "Jan_0.6.6.exe" \ + --new-version "Jan_0.6.7.exe" \ + --max-turns 65 +``` + +### Example 3: Advanced Migration +```bash +# Test complete functionality with ReportPortal +python main.py \ + --enable-migration-test \ + --migration-test-case "assistants-complete" \ + --migration-batch-mode \ + --old-version "Jan_0.6.6.exe" \ + --new-version "Jan_0.6.7.exe" \ + --max-turns 75 \ + --enable-reportportal \ + --rp-token "YOUR_TOKEN" \ + --rp-project "jan_migration_tests" +``` + +## Troubleshooting + +### Common Issues +1. **Path Issues**: Use absolute paths with proper escaping +2. **Turn Limits**: Increase `--max-turns` for complex tests +3. **Permissions**: Run as administrator on Windows +4. **Dependencies**: Ensure all packages are installed + +### Debug Mode +```bash +# Enable verbose logging +export LOG_LEVEL=DEBUG +export PYTHONPATH=. + +# Run with debug output +python main.py --enable-migration-test ... +``` + +## Output Files + +- **Trajectories**: `trajectories/` - Agent interaction logs +- **Recordings**: `recordings/` - Screen recordings (MP4) +- **Console**: Detailed execution logs + +## Next Steps + +1. **Start Simple**: Run basic tests first +2. **Add Migration**: Test data persistence +3. **Customize**: Adjust parameters for your needs +4. **Integrate**: Add to CI/CD pipelines + +For detailed documentation, see [MIGRATION_TESTING.md](MIGRATION_TESTING.md) and [README.md](README.md). diff --git a/autoqa/batch_migration_runner.py b/autoqa/batch_migration_runner.py new file mode 100644 index 000000000..31759c59e --- /dev/null +++ b/autoqa/batch_migration_runner.py @@ -0,0 +1,413 @@ +import asyncio +import logging +import os +import time +from datetime import datetime +from pathlib import Path +import threading + +from utils import force_close_jan, is_jan_running, start_jan_app +from migration_utils import install_jan_version, prepare_migration_environment +from test_runner import run_single_test_with_timeout +from agent import ComputerAgent, LLM +from screen_recorder import ScreenRecorder +from reportportal_handler import upload_test_results_to_rp +from utils import get_latest_trajectory_folder +from reportportal_handler import extract_test_result_from_trajectory + +logger = logging.getLogger(__name__) + +async def run_single_test_with_timeout_no_restart(computer, test_data, rp_client, launch_id, max_turns=30, + jan_app_path=None, jan_process_name="Jan.exe", agent_config=None, + enable_reportportal=False): + """ + Run a single test case WITHOUT restarting the Jan app - assumes app is already running + Returns dict with test result: {"success": bool, "status": str, "message": str} + """ + path = test_data['path'] + prompt = test_data['prompt'] + + # Detect if using nightly version based on process name + is_nightly = "nightly" in jan_process_name.lower() if jan_process_name else False + + # Default agent config if not provided + if agent_config is None: + agent_config = { + "loop": "uitars", + "model_provider": "oaicompat", + "model_name": "ByteDance-Seed/UI-TARS-1.5-7B", + "model_base_url": "http://10.200.108.58:1234/v1" + } + + # Create trajectory_dir from path (remove .txt extension) + trajectory_name = str(Path(path).with_suffix('')) + trajectory_base_dir = os.path.abspath(f"trajectories/{trajectory_name.replace(os.sep, '/')}") + + # Ensure trajectories directory exists + os.makedirs(os.path.dirname(trajectory_base_dir), exist_ok=True) + + # Create recordings directory + recordings_dir = "recordings" + os.makedirs(recordings_dir, exist_ok=True) + + # Create video filename + current_time = datetime.now().strftime("%Y%m%d_%H%M%S") + safe_test_name = trajectory_name.replace('/', '_').replace('\\', '_') + video_filename = f"{safe_test_name}_{current_time}.mp4" + video_path = os.path.abspath(os.path.join(recordings_dir, video_filename)) + + # Initialize screen recorder + recorder = ScreenRecorder(video_path, fps=10) + + try: + # Check if Jan app is running (don't restart) + from utils import is_jan_running + if not is_jan_running(jan_process_name): + logger.warning(f"Jan application ({jan_process_name}) is not running, but continuing anyway") + else: + # Ensure window is maximized for this test + from utils import maximize_jan_window + if maximize_jan_window(): + logger.info("Jan application window maximized for test") + else: + logger.warning("Could not maximize Jan application window for test") + + # Start screen recording + recorder.start_recording() + + # Create agent for this test using config + agent = ComputerAgent( + computer=computer, + loop=agent_config["loop"], + model=LLM( + provider=agent_config["model_provider"], + name=agent_config["model_name"], + provider_base_url=agent_config["model_base_url"] + ), + trajectory_dir=trajectory_base_dir + ) + + # Run the test with prompt + logger.info(f"Running test case: {path}") + + async for result in agent.run(prompt): + logger.info(f"Test result for {path}: {result}") + print(result) + + # Stop screen recording + recorder.stop_recording() + + # Extract test result + trajectory_folder = get_latest_trajectory_folder(path) + test_result = extract_test_result_from_trajectory(trajectory_folder) + + # Upload to ReportPortal if enabled + if enable_reportportal and rp_client and launch_id: + upload_test_results_to_rp(rp_client, launch_id, test_result, trajectory_folder) + + return test_result + + except Exception as e: + logger.error(f"Test failed with exception: {e}") + recorder.stop_recording() + return {"success": False, "status": "error", "message": str(e)} + finally: + # Stop screen recording + recorder.stop_recording() + + # Don't close Jan app - let it keep running for the next test + logger.info(f"Completed test: {path} (Jan app kept running)") + +async def run_batch_migration_test(computer, old_version_path, new_version_path, + rp_client=None, launch_id=None, max_turns=30, agent_config=None, + enable_reportportal=False, test_cases=None): + """ + Run migration test with batch approach: all setups first, then upgrade, then all verifies + + This approach is more realistic (like a real user) but less granular for debugging + """ + from individual_migration_runner import MIGRATION_TEST_CASES + + if test_cases is None: + test_cases = list(MIGRATION_TEST_CASES.keys()) + + logger.info("=" * 100) + logger.info("RUNNING BATCH MIGRATION TESTS") + logger.info("=" * 100) + logger.info(f"Test cases: {', '.join(test_cases)}") + logger.info("Approach: Setup All β†’ Upgrade β†’ Verify All") + logger.info("") + + batch_result = { + "overall_success": False, + "setup_phase_success": False, + "upgrade_success": False, + "verification_phase_success": False, + "setup_results": {}, + "verify_results": {}, + "error_message": None + } + + try: + # Prepare migration environment + env_setup = prepare_migration_environment() + logger.info(f"Migration environment prepared: {env_setup}") + + # PHASE 1: Install old version and run ALL setup tests + logger.info("=" * 80) + logger.info("PHASE 1: BATCH SETUP ON OLD VERSION") + logger.info("=" * 80) + + install_jan_version(old_version_path, "old") + time.sleep(15) # Extra wait time for stability + + # Force close any existing Jan processes before starting fresh + logger.info("Force closing any existing Jan processes...") + force_close_jan("Jan.exe") + force_close_jan("Jan-nightly.exe") + time.sleep(5) # Wait for processes to fully close + + # Start Jan app once for the entire setup phase + logger.info("Starting Jan application for setup phase...") + start_jan_app() + time.sleep(10) # Wait for app to be ready + + # Ensure window is maximized for testing + from utils import maximize_jan_window + if maximize_jan_window(): + logger.info("Jan application window maximized for setup phase") + else: + logger.warning("Could not maximize Jan application window for setup phase") + + setup_failures = 0 + + for i, test_case_key in enumerate(test_cases, 1): + test_case = MIGRATION_TEST_CASES[test_case_key] + logger.info(f"[{i}/{len(test_cases)}] Running setup: {test_case['name']}") + + # Support both single setup_test and multiple setup_tests + setup_files = [] + if 'setup_tests' in test_case: + setup_files = test_case['setup_tests'] + elif 'setup_test' in test_case: + setup_files = [test_case['setup_test']] + else: + logger.error(f"No setup tests defined for {test_case_key}") + batch_result["setup_results"][test_case_key] = False + setup_failures += 1 + continue + + # Run all setup files for this test case + test_case_setup_success = True + for j, setup_file in enumerate(setup_files, 1): + logger.info(f" [{j}/{len(setup_files)}] Running setup file: {setup_file}") + + # Load and run setup test + setup_test_path = f"tests/migration/{setup_file}" + if not os.path.exists(setup_test_path): + logger.error(f"Setup test file not found: {setup_test_path}") + test_case_setup_success = False + continue + + with open(setup_test_path, "r") as f: + setup_content = f.read() + + setup_test_data = { + "path": setup_file, + "prompt": setup_content + } + + # Run test without restarting Jan app (assumes Jan is already running) + setup_result = await run_single_test_with_timeout_no_restart( + computer=computer, + test_data=setup_test_data, + rp_client=rp_client, + launch_id=launch_id, + max_turns=max_turns, + jan_app_path=None, + jan_process_name="Jan.exe", + agent_config=agent_config, + enable_reportportal=enable_reportportal + ) + + success = setup_result.get("success", False) if setup_result else False + if success: + logger.info(f" βœ… Setup file {setup_file}: SUCCESS") + else: + logger.error(f" ❌ Setup file {setup_file}: FAILED") + test_case_setup_success = False + + # Small delay between setup files + time.sleep(3) + + # Record overall result for this test case + batch_result["setup_results"][test_case_key] = test_case_setup_success + + if test_case_setup_success: + logger.info(f"βœ… Setup {test_case_key}: SUCCESS (all {len(setup_files)} files completed)") + else: + logger.error(f"❌ Setup {test_case_key}: FAILED (one or more files failed)") + setup_failures += 1 + + # Small delay between setups + time.sleep(3) + + batch_result["setup_phase_success"] = setup_failures == 0 + logger.info(f"Setup phase complete: {len(test_cases) - setup_failures}/{len(test_cases)} successful") + + if setup_failures > 0: + logger.warning(f"{setup_failures} setup tests failed - continuing with upgrade anyway") + + # PHASE 2: Upgrade to new version + logger.info("=" * 80) + logger.info("PHASE 2: UPGRADING TO NEW VERSION") + logger.info("=" * 80) + + force_close_jan("Jan.exe") + force_close_jan("Jan-nightly.exe") + time.sleep(5) + + install_jan_version(new_version_path, "new") + batch_result["upgrade_success"] = True + time.sleep(15) # Extra wait time after upgrade + + # Force close any existing Jan processes before starting fresh + logger.info("Force closing any existing Jan processes...") + force_close_jan("Jan.exe") + force_close_jan("Jan-nightly.exe") + time.sleep(5) # Wait for processes to fully close + + # Start Jan app once for the entire verification phase + logger.info("Starting Jan application for verification phase...") + start_jan_app() + time.sleep(10) # Wait for app to be ready + + # Ensure window is maximized for testing + from utils import maximize_jan_window + if maximize_jan_window(): + logger.info("Jan application window maximized for verification phase") + else: + logger.warning("Could not maximize Jan application window for verification phase") + + # PHASE 3: Run ALL verification tests on new version + logger.info("=" * 80) + logger.info("PHASE 3: BATCH VERIFICATION ON NEW VERSION") + logger.info("=" * 80) + + verify_failures = 0 + + for i, test_case_key in enumerate(test_cases, 1): + test_case = MIGRATION_TEST_CASES[test_case_key] + logger.info(f"[{i}/{len(test_cases)}] Running verification: {test_case['name']}") + + # Skip verification if setup failed (optional - you could still try) + if not batch_result["setup_results"].get(test_case_key, False): + logger.warning(f"Skipping verification for {test_case_key} - setup failed") + batch_result["verify_results"][test_case_key] = False + verify_failures += 1 + continue + + # Support both single verify_test and multiple verify_tests + verify_files = [] + if 'verify_tests' in test_case: + verify_files = test_case['verify_tests'] + elif 'verify_test' in test_case: + verify_files = [test_case['verify_test']] + else: + logger.error(f"No verify tests defined for {test_case_key}") + batch_result["verify_results"][test_case_key] = False + verify_failures += 1 + continue + + # Run all verify files for this test case + test_case_verify_success = True + for j, verify_file in enumerate(verify_files, 1): + logger.info(f" [{j}/{len(verify_files)}] Running verify file: {verify_file}") + + # Load and run verification test + verify_test_path = f"tests/migration/{verify_file}" + if not os.path.exists(verify_test_path): + logger.error(f"Verification test file not found: {verify_test_path}") + test_case_verify_success = False + continue + + with open(verify_test_path, "r") as f: + verify_content = f.read() + + verify_test_data = { + "path": verify_file, + "prompt": verify_content + } + + # Run test without restarting Jan app (assumes Jan is already running) + verify_result = await run_single_test_with_timeout_no_restart( + computer=computer, + test_data=verify_test_data, + rp_client=rp_client, + launch_id=launch_id, + max_turns=max_turns, + jan_app_path=None, + jan_process_name="Jan.exe", + agent_config=agent_config, + enable_reportportal=enable_reportportal + ) + + success = verify_result.get("success", False) if verify_result else False + if success: + logger.info(f" βœ… Verify file {verify_file}: SUCCESS") + else: + logger.error(f" ❌ Verify file {verify_file}: FAILED") + test_case_verify_success = False + + # Small delay between verify files + time.sleep(3) + + # Record overall result for this test case + batch_result["verify_results"][test_case_key] = test_case_verify_success + + if test_case_verify_success: + logger.info(f"βœ… Verify {test_case_key}: SUCCESS (all {len(verify_files)} files completed)") + else: + logger.error(f"❌ Verify {test_case_key}: FAILED (one or more files failed)") + verify_failures += 1 + + # Small delay between verifications + time.sleep(3) + + batch_result["verification_phase_success"] = verify_failures == 0 + logger.info(f"Verification phase complete: {len(test_cases) - verify_failures}/{len(test_cases)} successful") + + # Overall success calculation + batch_result["overall_success"] = ( + batch_result["setup_phase_success"] and + batch_result["upgrade_success"] and + batch_result["verification_phase_success"] + ) + + # Final summary + logger.info("=" * 100) + logger.info("BATCH MIGRATION TEST SUMMARY") + logger.info("=" * 100) + logger.info(f"Overall Success: {batch_result['overall_success']}") + logger.info(f"Setup Phase: {batch_result['setup_phase_success']} ({len(test_cases) - setup_failures}/{len(test_cases)})") + logger.info(f"Upgrade Phase: {batch_result['upgrade_success']}") + logger.info(f"Verification Phase: {batch_result['verification_phase_success']} ({len(test_cases) - verify_failures}/{len(test_cases)})") + logger.info("") + logger.info("Detailed Results:") + for test_case_key in test_cases: + setup_status = "βœ…" if batch_result["setup_results"].get(test_case_key, False) else "❌" + verify_status = "βœ…" if batch_result["verify_results"].get(test_case_key, False) else "❌" + logger.info(f" {test_case_key.ljust(20)}: Setup {setup_status} | Verify {verify_status}") + + return batch_result + + except Exception as e: + logger.error(f"Batch migration test failed with exception: {e}") + batch_result["error_message"] = str(e) + return batch_result + finally: + # Cleanup + force_close_jan("Jan.exe") + force_close_jan("Jan-nightly.exe") + + diff --git a/autoqa/individual_migration_runner.py b/autoqa/individual_migration_runner.py new file mode 100644 index 000000000..e8df1b4ed --- /dev/null +++ b/autoqa/individual_migration_runner.py @@ -0,0 +1,462 @@ +import asyncio +import logging +import os +import time +from datetime import datetime +from pathlib import Path + +from utils import force_close_jan, is_jan_running +from migration_utils import install_jan_version, prepare_migration_environment +from test_runner import run_single_test_with_timeout + +logger = logging.getLogger(__name__) + +# Migration test case definitions - organized by QA checklist categories +MIGRATION_TEST_CASES = { + "models": { + "name": "Model Downloads Migration", + "setup_test": "models/setup-download-model.txt", + "verify_test": "models/verify-model-persistence.txt", + "description": "Tests that downloaded models persist after upgrade" + }, + "assistants": { + "name": "Custom Assistants Migration", + "setup_test": "assistants/setup-create-assistants.txt", + "verify_test": "assistants/verify-create-assistant-persistence.txt", + "description": "Tests that custom assistants persist after upgrade" + }, + "assistants-complete": { + "name": "Complete Assistants Migration (Create + Chat)", + "setup_tests": [ + "assistants/setup-create-assistants.txt", + "assistants/setup-chat-with-assistant.txt" + ], + "verify_tests": [ + "assistants/verify-create-assistant-persistence.txt", + "assistants/verify-chat-with-assistant-persistence.txt" + ], + "description": "Tests that custom assistants creation and chat functionality persist after upgrade (batch mode only)" + }, +} + +async def run_individual_migration_test(computer, test_case_key, old_version_path, new_version_path, + rp_client=None, launch_id=None, max_turns=30, agent_config=None, + enable_reportportal=False): + """ + Run a single migration test case + + Args: + computer: Computer agent instance + test_case_key: Key identifying the test case (e.g., "models", "chat-threads") + old_version_path: Path to old version installer + new_version_path: Path to new version installer + rp_client: ReportPortal client (optional) + launch_id: ReportPortal launch ID (optional) + max_turns: Maximum turns per test phase + agent_config: Agent configuration + enable_reportportal: Whether to upload to ReportPortal + """ + if test_case_key not in MIGRATION_TEST_CASES: + raise ValueError(f"Unknown test case: {test_case_key}") + + test_case = MIGRATION_TEST_CASES[test_case_key] + + logger.info("=" * 80) + logger.info(f"RUNNING MIGRATION TEST: {test_case['name'].upper()}") + logger.info("=" * 80) + logger.info(f"Description: {test_case['description']}") + logger.info(f"Setup Test: tests/migration/{test_case['setup_test']}") + logger.info(f"Verify Test: tests/migration/{test_case['verify_test']}") + logger.info("") + logger.info("Test Flow:") + logger.info(" 1. Install OLD version β†’ Run SETUP test") + logger.info(" 2. Install NEW version β†’ Run VERIFY test") + logger.info(" 3. Cleanup and prepare for next test") + logger.info("") + + migration_result = { + "test_case": test_case_key, + "test_name": test_case["name"], + "overall_success": False, + "old_version_setup": False, + "new_version_install": False, + "upgrade_verification": False, + "error_message": None + } + + try: + # Prepare migration environment + env_setup = prepare_migration_environment() + logger.info(f"Migration environment prepared: {env_setup}") + + # Phase 1: Install old version and run setup test + logger.info("PHASE 1: Installing old version and running setup test") + logger.info("-" * 60) + + install_jan_version(old_version_path, "old") + time.sleep(10) # Wait for Jan to be ready + + # Load and run setup test + setup_test_path = f"tests/migration/{test_case['setup_test']}" + if not os.path.exists(setup_test_path): + raise FileNotFoundError(f"Setup test file not found: {setup_test_path}") + + with open(setup_test_path, "r") as f: + setup_content = f.read() + + setup_test_data = { + "path": test_case['setup_test'], + "prompt": setup_content + } + + setup_result = await run_single_test_with_timeout( + computer=computer, + test_data=setup_test_data, + rp_client=rp_client, + launch_id=launch_id, + max_turns=max_turns, + jan_app_path=None, # Auto-detect + jan_process_name="Jan.exe", + agent_config=agent_config, + enable_reportportal=enable_reportportal + ) + + migration_result["old_version_setup"] = setup_result.get("success", False) if setup_result else False + logger.info(f"Setup phase result: {migration_result['old_version_setup']}") + + if not migration_result["old_version_setup"]: + migration_result["error_message"] = f"Failed to setup {test_case['name']} on old version" + return migration_result + + # Phase 2: Install new version (upgrade) + logger.info("PHASE 2: Installing new version (upgrade)") + logger.info("-" * 60) + + # Force close Jan before installing new version + force_close_jan("Jan.exe") + force_close_jan("Jan-nightly.exe") + time.sleep(5) + + # Install new version + install_jan_version(new_version_path, "new") + migration_result["new_version_install"] = True + time.sleep(10) # Wait for new version to be ready + + # Phase 3: Run verification test on new version (includes data integrity check) + logger.info("PHASE 3: Running verification test on new version") + logger.info("-" * 60) + + # Load and run verification test + verify_test_path = f"tests/migration/{test_case['verify_test']}" + if not os.path.exists(verify_test_path): + raise FileNotFoundError(f"Verification test file not found: {verify_test_path}") + + with open(verify_test_path, "r") as f: + verify_content = f.read() + + verify_test_data = { + "path": test_case['verify_test'], + "prompt": verify_content + } + + verify_result = await run_single_test_with_timeout( + computer=computer, + test_data=verify_test_data, + rp_client=rp_client, + launch_id=launch_id, + max_turns=max_turns, + jan_app_path=None, # Auto-detect + jan_process_name="Jan.exe", + agent_config=agent_config, + enable_reportportal=enable_reportportal + ) + + migration_result["upgrade_verification"] = verify_result.get("success", False) if verify_result else False + logger.info(f"Verification phase result: {migration_result['upgrade_verification']}") + + # Overall success check + migration_result["overall_success"] = ( + migration_result["old_version_setup"] and + migration_result["new_version_install"] and + migration_result["upgrade_verification"] + ) + + logger.info("=" * 80) + logger.info(f"MIGRATION TEST COMPLETED: {test_case['name'].upper()}") + logger.info("=" * 80) + logger.info(f"Overall Success: {migration_result['overall_success']}") + logger.info(f"Old Version Setup: {migration_result['old_version_setup']}") + logger.info(f"New Version Install: {migration_result['new_version_install']}") + logger.info(f"Upgrade Verification: {migration_result['upgrade_verification']}") + + return migration_result + + except Exception as e: + logger.error(f"Migration test {test_case['name']} failed with exception: {e}") + migration_result["error_message"] = str(e) + return migration_result + finally: + # Cleanup: Force close any remaining Jan processes + force_close_jan("Jan.exe") + force_close_jan("Jan-nightly.exe") + +async def run_assistant_batch_migration_test(computer, old_version_path, new_version_path, + rp_client=None, launch_id=None, max_turns=30, agent_config=None, + enable_reportportal=False): + """ + Run both assistant test cases in batch mode: + - Setup both assistant tests on old version + - Upgrade to new version + - Verify both assistant tests on new version + """ + assistant_test_cases = ["assistants", "assistant-chat"] + + logger.info("=" * 100) + logger.info("RUNNING ASSISTANT BATCH MIGRATION TESTS") + logger.info("=" * 100) + logger.info(f"Test cases: {', '.join(assistant_test_cases)}") + logger.info("Approach: Setup Both β†’ Upgrade β†’ Verify Both") + logger.info("") + + batch_result = { + "overall_success": False, + "setup_phase_success": False, + "upgrade_success": False, + "verification_phase_success": False, + "setup_results": {}, + "verify_results": {}, + "error_message": None + } + + try: + # Prepare migration environment + env_setup = prepare_migration_environment() + logger.info(f"Migration environment prepared: {env_setup}") + + # PHASE 1: Install old version and run BOTH setup tests + logger.info("=" * 80) + logger.info("PHASE 1: BATCH SETUP ON OLD VERSION") + logger.info("=" * 80) + + install_jan_version(old_version_path, "old") + time.sleep(15) # Extra wait time for stability + + setup_failures = 0 + + for i, test_case_key in enumerate(assistant_test_cases, 1): + test_case = MIGRATION_TEST_CASES[test_case_key] + logger.info(f"[{i}/{len(assistant_test_cases)}] Running setup: {test_case['name']}") + + # Load and run setup test + setup_test_path = f"tests/migration/{test_case['setup_test']}" + if not os.path.exists(setup_test_path): + logger.error(f"Setup test file not found: {setup_test_path}") + batch_result["setup_results"][test_case_key] = False + setup_failures += 1 + continue + + with open(setup_test_path, "r") as f: + setup_content = f.read() + + setup_test_data = { + "path": test_case['setup_test'], + "prompt": setup_content + } + + setup_result = await run_single_test_with_timeout( + computer=computer, + test_data=setup_test_data, + rp_client=rp_client, + launch_id=launch_id, + max_turns=max_turns, + jan_app_path=None, + jan_process_name="Jan.exe", + agent_config=agent_config, + enable_reportportal=enable_reportportal + ) + + success = setup_result.get("success", False) if setup_result else False + batch_result["setup_results"][test_case_key] = success + + if success: + logger.info(f"βœ… Setup {test_case_key}: SUCCESS") + else: + logger.error(f"❌ Setup {test_case_key}: FAILED") + setup_failures += 1 + + # Small delay between setups + time.sleep(3) + + batch_result["setup_phase_success"] = setup_failures == 0 + logger.info(f"Setup phase complete: {len(assistant_test_cases) - setup_failures}/{len(assistant_test_cases)} successful") + + # PHASE 2: Upgrade to new version + logger.info("=" * 80) + logger.info("PHASE 2: UPGRADING TO NEW VERSION") + logger.info("=" * 80) + + force_close_jan("Jan.exe") + force_close_jan("Jan-nightly.exe") + time.sleep(5) + + install_jan_version(new_version_path, "new") + batch_result["upgrade_success"] = True + time.sleep(15) # Extra wait time after upgrade + + # PHASE 3: Run BOTH verification tests on new version + logger.info("=" * 80) + logger.info("PHASE 3: BATCH VERIFICATION ON NEW VERSION") + logger.info("=" * 80) + + verify_failures = 0 + + for i, test_case_key in enumerate(assistant_test_cases, 1): + test_case = MIGRATION_TEST_CASES[test_case_key] + logger.info(f"[{i}/{len(assistant_test_cases)}] Running verification: {test_case['name']}") + + # Load and run verification test + verify_test_path = f"tests/migration/{test_case['verify_test']}" + if not os.path.exists(verify_test_path): + logger.error(f"Verification test file not found: {verify_test_path}") + batch_result["verify_results"][test_case_key] = False + verify_failures += 1 + continue + + with open(verify_test_path, "r") as f: + verify_content = f.read() + + verify_test_data = { + "path": test_case['verify_test'], + "prompt": verify_content + } + + verify_result = await run_single_test_with_timeout( + computer=computer, + test_data=verify_test_data, + rp_client=rp_client, + launch_id=launch_id, + max_turns=max_turns, + jan_app_path=None, + jan_process_name="Jan.exe", + agent_config=agent_config, + enable_reportportal=enable_reportportal + ) + + success = verify_result.get("success", False) if verify_result else False + batch_result["verify_results"][test_case_key] = success + + if success: + logger.info(f"βœ… Verify {test_case_key}: SUCCESS") + else: + logger.error(f"❌ Verify {test_case_key}: FAILED") + verify_failures += 1 + + # Small delay between verifications + time.sleep(3) + + batch_result["verification_phase_success"] = verify_failures == 0 + logger.info(f"Verification phase complete: {len(assistant_test_cases) - verify_failures}/{len(assistant_test_cases)} successful") + + # Overall success calculation + batch_result["overall_success"] = ( + batch_result["setup_phase_success"] and + batch_result["upgrade_success"] and + batch_result["verification_phase_success"] + ) + + # Final summary + logger.info("=" * 100) + logger.info("ASSISTANT BATCH MIGRATION TEST SUMMARY") + logger.info("=" * 100) + logger.info(f"Overall Success: {batch_result['overall_success']}") + logger.info(f"Setup Phase: {batch_result['setup_phase_success']} ({len(assistant_test_cases) - setup_failures}/{len(assistant_test_cases)})") + logger.info(f"Upgrade Phase: {batch_result['upgrade_success']}") + logger.info(f"Verification Phase: {batch_result['verification_phase_success']} ({len(assistant_test_cases) - verify_failures}/{len(assistant_test_cases)})") + logger.info("") + logger.info("Detailed Results:") + for test_case_key in assistant_test_cases: + setup_status = "βœ…" if batch_result["setup_results"].get(test_case_key, False) else "❌" + verify_status = "βœ…" if batch_result["verify_results"].get(test_case_key, False) else "❌" + logger.info(f" {test_case_key.ljust(20)}: Setup {setup_status} | Verify {verify_status}") + + return batch_result + + except Exception as e: + logger.error(f"Assistant batch migration test failed with exception: {e}") + batch_result["error_message"] = str(e) + return batch_result + finally: + # Cleanup + force_close_jan("Jan.exe") + force_close_jan("Jan-nightly.exe") + +async def run_all_migration_tests(computer, old_version_path, new_version_path, rp_client=None, + launch_id=None, max_turns=30, agent_config=None, enable_reportportal=False, + test_cases=None): + """ + Run multiple migration test cases + + Args: + test_cases: List of test case keys to run. If None, runs all test cases. + """ + if test_cases is None: + test_cases = list(MIGRATION_TEST_CASES.keys()) + + logger.info("=" * 100) + logger.info("RUNNING ALL MIGRATION TESTS") + logger.info("=" * 100) + logger.info(f"Test cases to run: {', '.join(test_cases)}") + + results = {} + overall_success = True + + for i, test_case_key in enumerate(test_cases, 1): + logger.info(f"\n[{i}/{len(test_cases)}] Starting migration test: {test_case_key}") + + result = await run_individual_migration_test( + computer=computer, + test_case_key=test_case_key, + old_version_path=old_version_path, + new_version_path=new_version_path, + rp_client=rp_client, + launch_id=launch_id, + max_turns=max_turns, + agent_config=agent_config, + enable_reportportal=enable_reportportal + ) + + results[test_case_key] = result + if not result["overall_success"]: + overall_success = False + + # Add delay between test cases + if i < len(test_cases): + logger.info("Waiting 30 seconds before next migration test...") + time.sleep(30) + + # Final summary + logger.info("=" * 100) + logger.info("MIGRATION TESTS SUMMARY") + logger.info("=" * 100) + + passed = sum(1 for r in results.values() if r["overall_success"]) + failed = len(results) - passed + + logger.info(f"Total tests: {len(results)}") + logger.info(f"Passed: {passed}") + logger.info(f"Failed: {failed}") + logger.info(f"Overall success: {overall_success}") + + for test_case_key, result in results.items(): + status = "PASS" if result["overall_success"] else "FAIL" + logger.info(f" {test_case_key}: {status}") + if result["error_message"]: + logger.info(f" Error: {result['error_message']}") + + return { + "overall_success": overall_success, + "total_tests": len(results), + "passed": passed, + "failed": failed, + "results": results + } diff --git a/autoqa/reportportal_handler.py b/autoqa/reportportal_handler.py index e05ea1ce3..d47845bee 100644 --- a/autoqa/reportportal_handler.py +++ b/autoqa/reportportal_handler.py @@ -114,46 +114,103 @@ def extract_test_result_from_trajectory(trajectory_dir): logger.info(f"Checking result in last turn: {last_turn}") - # Look for API call response files + # Look for agent response files first (preferred), then fall back to response files + agent_response_files = [f for f in os.listdir(last_turn_path) + if f.startswith("api_call_") and f.endswith("_agent_response.json")] response_files = [f for f in os.listdir(last_turn_path) if f.startswith("api_call_") and f.endswith("_response.json")] - if not response_files: + # Prefer agent_response files, but fall back to response files if needed + if agent_response_files: + target_files = agent_response_files + file_type = "agent_response" + elif response_files: + target_files = response_files + file_type = "response" + else: logger.warning("No API response files found in last turn") return False # Check the last response file - last_response_file = sorted(response_files)[-1] + last_response_file = sorted(target_files)[-1] response_file_path = os.path.join(last_turn_path, last_response_file) - logger.info(f"Checking response file: {last_response_file}") + logger.info(f"Checking {file_type} file: {last_response_file}") with open(response_file_path, 'r', encoding='utf-8') as f: data = json.load(f) - # Extract content from response - if 'response' in data and 'choices' in data['response'] and data['response']['choices']: - last_choice = data['response']['choices'][-1] - if 'message' in last_choice and 'content' in last_choice['message']: - content = last_choice['message']['content'] - logger.info(f"Last response content: {content}") - - # Look for result patterns - need to check both True and False - true_pattern = r'\{\s*"result"\s*:\s*True\s*\}' - false_pattern = r'\{\s*"result"\s*:\s*False\s*\}' - - true_match = re.search(true_pattern, content) - false_match = re.search(false_pattern, content) - - if true_match: - logger.info(f"Found test result: True - PASSED") - return True - elif false_match: - logger.info(f"Found test result: False - FAILED") - return False - else: - logger.warning("No valid result pattern found in response content - marking as FAILED") - return False + # Extract content from response - handle both agent_response and response formats + content = None + if file_type == "agent_response": + logger.info(f"Processing agent_response file with keys: {list(data.keys())}") + + # For agent_response.json: look in multiple possible locations + if 'response' in data and 'choices' in data['response'] and data['response']['choices']: + last_choice = data['response']['choices'][-1] + if 'message' in last_choice and 'content' in last_choice['message']: + content = last_choice['message']['content'] + logger.info(f"Found content in response.choices[].message.content: {content}") + + # Also check in output array for message content - handle both direct and nested structures + output_array = None + if 'output' in data: + output_array = data['output'] + logger.info(f"Found output array directly in data with {len(output_array)} items") + elif 'response' in data and isinstance(data['response'], dict) and 'output' in data['response']: + output_array = data['response']['output'] + logger.info(f"Found output array in nested response with {len(output_array)} items") + + if not content and output_array: + for i, output_item in enumerate(output_array): + logger.info(f"Output item {i}: type={output_item.get('type')}") + if output_item.get('type') == 'message': + message_content = output_item.get('content', []) + logger.info(f"Found message with {len(message_content)} content items") + for j, content_item in enumerate(message_content): + logger.info(f"Content item {j}: type={content_item.get('type')}, text={content_item.get('text', '')}") + if content_item.get('type') == 'output_text': + potential_content = content_item.get('text', '') + if 'result' in potential_content: + content = potential_content + logger.info(f"Found result content: {content}") + break + if content: + break + + if not content and not output_array: + logger.warning(f"No 'output' key found in data or nested response. Available keys: {list(data.keys())}") + if 'response' in data: + logger.warning(f"Response keys: {list(data['response'].keys()) if isinstance(data['response'], dict) else 'Not a dict'}") + else: + # For response.json: look in choices[0].message.content + if 'response' in data and 'choices' in data['response'] and data['response']['choices']: + last_choice = data['response']['choices'][-1] + if 'message' in last_choice and 'content' in last_choice['message']: + content = last_choice['message']['content'] + + if content: + logger.info(f"Last {file_type} content: {content}") + + # Look for result patterns - need to check both True and False + # Updated patterns to handle additional JSON fields and both Python and JSON boolean values + true_pattern = r'\{\s*"result"\s*:\s*(true|True)\s*[,}]' + false_pattern = r'\{\s*"result"\s*:\s*(false|False)\s*[,}]' + + true_match = re.search(true_pattern, content) + false_match = re.search(false_pattern, content) + + if true_match: + logger.info(f"Found test result: True - PASSED") + return True + elif false_match: + logger.info(f"Found test result: False - FAILED") + return False + else: + logger.warning("No valid result pattern found in response content - marking as FAILED") + return False + else: + logger.warning(f"Could not extract content from {file_type} structure") logger.warning("Could not extract content from response structure") return False diff --git a/autoqa/tests/base/default-jan-assistant.txt b/autoqa/tests/base/default-jan-assistant.txt new file mode 100644 index 000000000..2d2a357b5 --- /dev/null +++ b/autoqa/tests/base/default-jan-assistant.txt @@ -0,0 +1,19 @@ +prompt = """ +You are going to test the Jan application by verifying that a default assistant named **Jan** is present. + +Step-by-step instructions: +0. Given the Jan application is already open. +1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing. This ensures full visibility of the interface. +2. In the bottom-left menu, click on **Assistants**. +3. On the Assistants screen, verify that there is a visible assistant card named **Jan**. +4. Confirm that it has a description under the name that starts with: + "Jan is a helpful desktop assistant..." + +If the assistant named Jan is present and its description is visible, return: +{"result": true} + +Otherwise, return: +{"result": false} + +Only use plain ASCII characters in your response. Do NOT use Unicode symbols. +""" \ No newline at end of file diff --git a/autoqa/tests/base/enable-mcp-server.txt b/autoqa/tests/base/enable-mcp-server.txt new file mode 100644 index 000000000..edc380f24 --- /dev/null +++ b/autoqa/tests/base/enable-mcp-server.txt @@ -0,0 +1,22 @@ +prompt = """ +You are going to test the Jan application by verifying that enabling Experimental Features reveals the MCP Servers section in Settings. + +Step-by-step instructions: +0. Given the Jan application is already open. +1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing. This ensures full visibility of the interface. +2. In the bottom-left menu, click **Settings**. +3. In the left sidebar, make sure **General** is selected. +4. Scroll down to the **Advanced** section. +5. Locate the toggle labeled **Experimental Features** and switch it ON. +6. Observe the **Settings** sidebar. +7. Verify that a new section called **MCP Servers** appears. +8. Click on **MCP Servers** in the sidebar to ensure it opens and displays its content correctly. + +If the MCP Servers section appears after enabling Experimental Features and you can open it successfully, return: +{"result": true} + +Otherwise, return: +{"result": false} + +Only use plain ASCII characters in your response. Do NOT use Unicode symbols. +""" \ No newline at end of file diff --git a/autoqa/tests/base/extensions.txt b/autoqa/tests/base/extensions.txt new file mode 100644 index 000000000..bc71a4c2c --- /dev/null +++ b/autoqa/tests/base/extensions.txt @@ -0,0 +1,22 @@ +prompt = """ +You are going to test the Jan application by verifying the available extensions listed under Settings β†’ Extensions. + +Step-by-step instructions: +0. Given the Jan application is already open. +1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing. This ensures full visibility of the interface. +2. In the bottom-left corner, click **Settings**. +3. In the left sidebar of Settings, click on **Extensions**. +4. In the main panel, confirm that the following four extensions are listed: + - Jan Assistant + - Conversational + - Download Manager + - llama.cpp Inference Engine + +If all four extensions are present, return: +{"result": true} + +Otherwise, return: +{"result": false} + +In all responses, use only plain ASCII characters. Do NOT use Unicode symbols. +""" \ No newline at end of file diff --git a/autoqa/tests/base/hardware-info.txt b/autoqa/tests/base/hardware-info.txt new file mode 100644 index 000000000..30120359c --- /dev/null +++ b/autoqa/tests/base/hardware-info.txt @@ -0,0 +1,60 @@ +prompt = """ +You are going to test the Jan application by verifying that the hardware information is displayed correctly in the Settings panel. + +Step-by-step instructions: +0. Given the Jan application is already opened. +1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing. This ensures full visibility of the interface. +2. In the bottom-left menu, click on **Settings**. +3. In the left sidebar, click on **Hardware**. + +4. In the main panel, ensure the following sections are displayed clearly with appropriate system information: + +--- + +**Operating System** +- This section should display: + - A name such as "Windows", "Ubuntu", or "Macos" + - A version string like "Windows 11 Pro", "22.04.5 LTS", or "macOS 15.5 Sequoia" + +--- + +**CPU** +- This section should display: + - A processor model (e.g., Intel, AMD, or Apple Silicon) + - An architecture (e.g., x86_64, amd64, or aarch64) + - A number of cores + - Optional: An instruction set list (may appear on Linux or Windows) + - A usage bar indicating current CPU load + +--- + +**Memory** +- This section should display: + - Total RAM + - Available RAM + - A usage bar showing memory consumption + +--- + +**GPUs** +- This section is located at the bottom of the Hardware page β€” **scroll down if it is not immediately visible**. +- If the system has a GPU: + - It should display the GPU name (e.g., NVIDIA GeForce GTX 1080) + - A toggle should be available to enable or disable GPU usage +- If no GPU is detected: + - It should display a message like β€œNo GPUs detected” + +--- + +**Final Check** +- Ensure that there are **no error messages** in the UI. +- The layout should appear clean and correctly rendered with no broken visual elements. + +If all sections display relevant hardware information accurately and the interface is error-free, return: +{"result": true} + +Otherwise, return: +{"result": false} + +Use only plain ASCII characters in your response. Do NOT use Unicode symbols. +""" \ No newline at end of file diff --git a/autoqa/tests/base/providers-available.txt b/autoqa/tests/base/providers-available.txt new file mode 100644 index 000000000..ed5b40d22 --- /dev/null +++ b/autoqa/tests/base/providers-available.txt @@ -0,0 +1,22 @@ +prompt = """ +You are going to test the Jan application by verifying that all expected model providers are listed in the Settings panel. + +Step-by-step instructions: +0. Given the Jan application is already opened. +1. If a dialog appears in the bottom-right corner titled **"Help Us Improve Jan"**, click **Deny** to dismiss it before continuing. This ensures full visibility of the interface. +2. In the bottom-left menu, click on **Settings**. +3. In the left sidebar of Settings, click on **Model Providers**. +4. In the main panel, verify that the following model providers are listed: + - Llama.cpp + - OpenAI + - Anthropic + - Cohere + - OpenRouter + - Mistral + - Groq + - Gemini + - Hugging Face + +If all the providers are visible, return: {"result": true}. Otherwise, return: {"result": false}. +Use only plain ASCII characters in all responses. Do NOT use Unicode symbols. +""" \ No newline at end of file diff --git a/autoqa/tests/new-user/1-user-start-chatting.txt b/autoqa/tests/base/user-start-chatting.txt similarity index 99% rename from autoqa/tests/new-user/1-user-start-chatting.txt rename to autoqa/tests/base/user-start-chatting.txt index 5fc5e7f3f..e1014eea7 100644 --- a/autoqa/tests/new-user/1-user-start-chatting.txt +++ b/autoqa/tests/base/user-start-chatting.txt @@ -14,4 +14,4 @@ Step-by-step instructions: If the model responds correctly, return: {"result": True}, otherwise return: {"result": False}. In all your responses, use only plain ASCII characters. Do NOT use Unicode symbols -""" +""" \ No newline at end of file diff --git a/autoqa/tests/migration/assistants/setup-chat-with-assistant.txt b/autoqa/tests/migration/assistants/setup-chat-with-assistant.txt new file mode 100644 index 000000000..4a9bb7055 --- /dev/null +++ b/autoqa/tests/migration/assistants/setup-chat-with-assistant.txt @@ -0,0 +1,46 @@ +You are setting up a chat thread using a custom assistant in the OLD version of the Jan application. + +PHASE: SETUP CHAT THREAD (OLD VERSION) + +Step-by-step instructions: + +1. Open the Jan application (OLD version). + +2. Download the model: + - In the bottom-left corner, click **Hub**. + - Find and download the model named: `jan-nano-gguf`. + - Wait for the download to complete (the button changes to **Use**). + - Click the **Use** button to return to the Chat UI. + +3. Start a new chat using a custom assistant: + - In the main chat panel, click the assistant icon at the top (default is `Jan`). + - Select the custom assistant: `Python Tutor`. + +4. Select the model: + - Click the **Select a model** button below the chat input. + - Choose: `jan-nano-gguf` under the `Llama.Cpp` section. + +5. Send a test message: + - Type: `Hello world` and press Enter or click send message (button with right arrow). + - Wait up to 1–2 minutes for the model to load and respond. + +6. Verify the model responds: + - If the model replies appropriately, and the thread is created successfully in the left sidebar under **No threads yet**, return: + {"result": True, "phase": "setup_complete"} + - If no response is received or the chat thread is not saved: + {"result": False, "phase": "setup_failed"} +5. Verify the model responds and return the result in the exact JSON format: + +CRITICAL INSTRUCTIONS FOR FINAL RESPONSE: +- You MUST respond in English only, not any other language +- You MUST return ONLY the JSON format below, nothing else +- Do NOT add any explanations, thoughts, or additional text + + - If the model replies appropriately, and the thread is created successfully in the left sidebar, return: + {"result": True, "phase": "setup_complete"} + - If no response is received: + {"result": False, "phase": "setup_failed"} + +IMPORTANT: +- Your response must be ONLY the JSON above +- Do NOT add any other text before or after the JSON diff --git a/autoqa/tests/migration/assistants/setup-create-assistants.txt b/autoqa/tests/migration/assistants/setup-create-assistants.txt new file mode 100644 index 000000000..9f89d7f1e --- /dev/null +++ b/autoqa/tests/migration/assistants/setup-create-assistants.txt @@ -0,0 +1,53 @@ +You are testing custom assistants persistence across Jan application upgrade. + +PHASE 1 - SETUP (OLD VERSION): +Step-by-step instructions for creating assistants in the OLD version: + +1. Open the Jan application (OLD version). + +2. Create the first assistant - Python Tutor: + - In the bottom-left corner, click **Assistants**. + - Click the **+** button to create a new assistant. + - In the **Add Assistant** modal: + - Select an emoji for the assistant. + - Set **Name**: `Python Tutor` + - Set **Description**: `A helpful Python programming tutor` + - Set **Instructions**: + ``` + You are an expert Python tutor. Always explain concepts clearly with examples. Use encouraging language and provide step-by-step solutions. + ``` + - Click **Save**. + +3. Create the second assistant - Creative Writer: + - Click the **+** button to create another assistant. + - In the **Add Assistant** modal: + - Select a different emoji. + - Set **Name**: `Creative Writer` + - Set **Description**: `A creative writing assistant for stories and poems` + - Set **Instructions**: + ``` + You are a creative writing assistant. Help users write engaging stories, poems, and creative content. Be imaginative and inspiring. + ``` + - Click **Save**. + +4. Verify both assistants appear in the list: + - Return to the **Assistants** section. + - Confirm you see both `Python Tutor` and `Creative Writer`. + - Confirm the names and descriptions are correctly displayed. + +5. Return the result in the exact JSON format: + +CRITICAL INSTRUCTIONS FOR FINAL RESPONSE: +- You MUST respond in English only, not any other language +- You MUST return ONLY the JSON format below, nothing else +- Do NOT add any explanations, thoughts, or additional text + + If both assistants were created successfully with the correct metadata and parameters, you MUST return exactly: + {"result": True, "phase": "setup_complete"} + + If there were any issues, you MUST return exactly: + {"result": False, "phase": "setup_failed"} + +IMPORTANT: +- Your response must be ONLY the JSON above +- Do NOT add any other text before or after the JSON diff --git a/autoqa/tests/migration/assistants/verify-chat-with-assistant-persistence.txt b/autoqa/tests/migration/assistants/verify-chat-with-assistant-persistence.txt new file mode 100644 index 000000000..8285a10ab --- /dev/null +++ b/autoqa/tests/migration/assistants/verify-chat-with-assistant-persistence.txt @@ -0,0 +1,37 @@ +You are verifying that a previously created chat thread with a custom assistant persists and functions correctly after upgrading the Jan application. + +PHASE: VERIFY CHAT THREAD (NEW VERSION) + +Step-by-step instructions: + +1. Open the Jan application (NEW version after upgrade). + +2. Verify that the previous chat thread still exists: + - Look in the **left sidebar** under the **Recents** section. + - Confirm that a thread exists with the title: `Hello world` (this is based on the initial message sent). + - Click on that thread to open it. + +3. Verify the assistant identity: + - In the opened thread, look at the top of the message from the assistant. + - Confirm it shows the assistant name: `Python Tutor`, along with the selected emoji next to it. + - Confirm that the assistant’s previous response is visible. + +4. Send a follow-up test message: + - Type: `Can you explain how for loops work in Python?` and press Enter. + - Wait for a complete response from the assistant. + +5. Verify correct behavior: + +CRITICAL INSTRUCTIONS FOR FINAL RESPONSE: +- You MUST respond in English only, not any other language +- You MUST return ONLY the JSON format below, nothing else +- Do NOT add any explanations, thoughts, or additional text + + - If the assistant responds clearly and informatively, maintaining the tutoring tone, and the thread identity (`Python Tutor`) is preserved, return: + {"result": true, "phase": "verification_complete"} + - If the thread is missing, the assistant identity is incorrect, or the assistant fails to respond, return: + {"result": false, "phase": "verification_failed"} + +IMPORTANT: +- Your response must be ONLY the JSON above +- Do NOT add any other text before or after the JSON diff --git a/autoqa/tests/migration/assistants/verify-create-assistant-persistence.txt b/autoqa/tests/migration/assistants/verify-create-assistant-persistence.txt new file mode 100644 index 000000000..95a5620fe --- /dev/null +++ b/autoqa/tests/migration/assistants/verify-create-assistant-persistence.txt @@ -0,0 +1,48 @@ +You are verifying that custom assistants persist correctly after upgrading the Jan application. + +PHASE 2 - VERIFICATION (NEW VERSION): +Step-by-step instructions for verifying assistant persistence in the NEW version: + +1. Open the Jan application (NEW version after upgrade). + +2. Verify that previously created assistants are preserved: + - In the bottom-left corner, click **Assistants**. + - Confirm that you see the following assistants in the list: + - Default assistant: `Jan` + - Custom assistant: `Python Tutor` + - Custom assistant: `Creative Writer` + +3. Verify the details of each assistant: + + - Click on Edit (Pencil Icon) of `Python Tutor`: + - Confirm **Name**: `Python Tutor` + - Confirm **Description**: `A helpful Python programming tutor` + - Confirm **Instructions** contain: + ``` + You are an expert Python tutor. Always explain concepts clearly with examples. Use encouraging language and provide step-by-step solutions. + ``` + + - Click on Edit (Pencil Icon) of `Creative Writer`: + - Confirm **Name**: `Creative Writer` + - Confirm **Description**: `A creative writing assistant for stories and poems` + - Confirm **Instructions** contain: + ``` + You are a creative writing assistant. Help users write engaging stories, poems, and creative content. Be imaginative and inspiring. + ``` + +4. Return the verification result: + +CRITICAL INSTRUCTIONS FOR FINAL RESPONSE: +- You MUST respond in English only, not any other language +- You MUST return ONLY the JSON format below, nothing else +- Do NOT add any explanations, thoughts, or additional text + +If all custom assistants are preserved with correct settings and parameters, return EXACTLY: +{"result": true, "phase": "verification_complete"} + +If any assistants are missing or have incorrect settings, return EXACTLY: +{"result": false, "phase": "verification_failed"} + +IMPORTANT: +- Your response must be ONLY the JSON above +- Do NOT add any other text before or after the JSON diff --git a/autoqa/tests/migration/models/setup-download-models.txt b/autoqa/tests/migration/models/setup-download-models.txt new file mode 100644 index 000000000..6c8bb9b40 --- /dev/null +++ b/autoqa/tests/migration/models/setup-download-models.txt @@ -0,0 +1,51 @@ +prompt = """ +You are testing comprehensive model functionality persistence across Jan application upgrade. + +PHASE 1 - SETUP (OLD VERSION): +Step-by-step instructions for OLD version setup: + +1. Given the Jan application is already opened (OLD version). + +2. Download multiple models from Hub: + - Click the **Hub** menu in the bottom-left corner + - Find and download **jan-nano-gguf** model + - Wait for download to complete (shows "Use" button) + - Find and download **gemma-2-2b-instruct-gguf** model if available + - Wait for second download to complete + +3. Test downloaded models in Hub: + - Verify both models show **Use** button instead of **Download** + - Click the **Downloaded** filter toggle on the right + - Verify both models appear in the downloaded models list + - Turn off the Downloaded filter + +4. Test models in chat: + - Click **New Chat** + - Select **jan-nano-gguf** from model dropdown + - Send: "Hello, can you tell me what model you are?" + - Wait for response + + - Create another new chat + - Select the second model from dropdown + - Send: "What's your model name and capabilities?" + - Wait for response + +5. Configure model provider settings: + - Go to **Settings** > **Model Providers** + - Click on **Llama.cpp** section + - Verify downloaded models are listed in the Models section + - Check that both models show correct names + - Try enabling **Auto-Unload Old Models** option + - Try adjusting **Context Length** for one of the models + +6. Test model settings persistence: + - Close Jan completely + - Reopen Jan + - Go to Settings > Model Providers > Llama.cpp + - Verify the Auto-Unload setting is still enabled + - Verify model settings are preserved + +If all models download successfully, appear in Hub with "Use" status, work in chat, and settings are preserved, return: {"result": True, "phase": "setup_complete"}, otherwise return: {"result": False, "phase": "setup_failed"}. + +In all your responses, use only plain ASCII characters. Do NOT use Unicode symbols. +""" diff --git a/autoqa/tests/migration/models/verify-model-persistence.txt b/autoqa/tests/migration/models/verify-model-persistence.txt new file mode 100644 index 000000000..c5a747264 --- /dev/null +++ b/autoqa/tests/migration/models/verify-model-persistence.txt @@ -0,0 +1,39 @@ +prompt = """ +You are verifying that model downloads and settings persist after Jan application upgrade. + +PHASE 2 - VERIFICATION (NEW VERSION): +Step-by-step instructions for NEW version verification: + +1. Given the Jan application is already opened (NEW version after upgrade). + +2. Verify models in Hub: + - Click the **Hub** menu in the bottom-left corner + - Look for the models that were downloaded: **jan-nano-gguf** and others + - Verify they show **Use** button instead of **Download** button + - Click the **Downloaded** filter toggle on the right + - Verify downloaded models appear in the filtered list + - Turn off the Downloaded filter + +3. Verify models are available in chat: + - Click **New Chat** + - Click on the model dropdown + - Verify downloaded models appear in the selectable list + - Select **jan-nano-gguf** + - Send: "Are you working correctly after the app upgrade?" + - Wait for response - should work normally + +4. Verify model provider settings: + - Go to **Settings** > **Model Providers** + - Click on **Llama.cpp** section (in the left sidebar, NOT the toggle) + - Verify downloaded models are listed in the Models section with correct names + - Check that any previously configured settings are preserved + +5. Test model management features: + - In the Models list, verify you can see model details + - Test that you can still start/stop models if applicable + - Verify model functionality is intact + +If all downloaded models are preserved, show correct status in Hub, work in chat, and model provider settings are maintained, return: {"result": True, "phase": "verification_complete"}, otherwise return: {"result": False, "phase": "verification_failed"}. + +In all your responses, use only plain ASCII characters. Do NOT use Unicode symbols. +"""