bandit-runner/ssh-proxy/agent.ts

/**
 * LangGraph agent integrated with SSH proxy
 * Runs in Node.js environment with full dependency support
 * Based on context7 best practices for streaming and config passing
 */

import { StateGraph, END, START, Annotation } from "@langchain/langgraph"
import { HumanMessage, SystemMessage } from "@langchain/core/messages"
import { ChatOpenAI } from "@langchain/openai"
import type { RunnableConfig } from "@langchain/core/runnables"
import type { Client } from 'ssh2'
import type { Response } from 'express'

// Define state using Annotation for proper LangGraph typing
const BanditState = Annotation.Root({
  runId: Annotation<string>,
  currentLevel: Annotation<number>,
  targetLevel: Annotation<number>,
  currentPassword: Annotation<string>,
  nextPassword: Annotation<string | null>,
  levelGoal: Annotation<string>,
  commandHistory: Annotation<Array<{
    command: string
    output: string
    exitCode: number
    timestamp: string
    level: number
  }>>({
    reducer: (left, right) => left.concat(right),
    default: () => [],
  }),
  thoughts: Annotation<Array<{
    type: 'plan' | 'observation' | 'reasoning' | 'decision'
    content: string
    timestamp: string
    level: number
  }>>({
    reducer: (left, right) => left.concat(right),
    default: () => [],
  }),
  status: Annotation<'planning' | 'executing' | 'validating' | 'advancing' | 'paused' | 'paused_for_user_action' | 'complete' | 'failed'>,
  retryCount: Annotation<number>,
  maxRetries: Annotation<number>,
  sshConnectionId: Annotation<string | null>,
  error: Annotation<string | null>,
  totalTokens: Annotation<number>({
    reducer: (left, right) => left + right,
    default: () => 0,
  }),
  totalCost: Annotation<number>({
    reducer: (left, right) => left + right,
    default: () => 0,
  }),
})

type BanditAgentState = typeof BanditState.State

const LEVEL_GOALS: Record<number, string> = {
  0: "Read 'readme' file in home directory",
  1: "Read '-' file (use 'cat ./-' or 'cat < -')",
  2: "Find and read hidden file with spaces in name",
  3: "Find file with specific permissions",
  4: "Find file in inhere directory that is human-readable",
  5: "Find file owned by bandit7, group bandit6, 33 bytes",
  // Add more as needed
}

const SYSTEM_PROMPT = `You are BanditRunner, an autonomous operator solving the OverTheWire Bandit wargame.

CRITICAL RULES:
1. You are ALREADY connected via SSH. Do NOT run 'ssh' commands yourself.
2. Only use safe shell commands: ls, cat, grep, find, strings, file, base64, tar, gzip, etc.
3. Think step-by-step before executing commands
4. Extract passwords (32-char alphanumeric strings) from command output
5. Validate before advancing to the next level

FORBIDDEN:
- Do NOT run: ssh, scp, sudo, su, rm -rf, chmod on system files
- Do NOT attempt nested SSH connections - you already have an active shell

WORKFLOW:
1. Plan - analyze level goal and formulate command strategy
2. Execute - run a single, focused command
3. Validate - check output for password (32-char alphanumeric)
4. Advance - proceed to next level with found password`

/**
 * Retry helper with exponential backoff
 */
async function retryWithBackoff<T>(
  fn: () => Promise<T>,
  maxRetries: number = 3,
  baseDelay: number = 1000,
  context: string = 'operation'
): Promise<T> {
  let lastError: Error | null = null

  for (let attempt = 0; attempt <= maxRetries; attempt++) {
    try {
      return await fn()
    } catch (error) {
      lastError = error instanceof Error ? error : new Error(String(error))

      if (attempt < maxRetries) {
        const delay = baseDelay * Math.pow(2, attempt) // Exponential backoff
        console.log(`${context} failed (attempt ${attempt + 1}/${maxRetries + 1}), retrying in ${delay}ms...`)
        await new Promise(resolve => setTimeout(resolve, delay))
      }
    }
  }

  throw new Error(`${context} failed after ${maxRetries + 1} attempts: ${lastError?.message}`)
}

/**
 * Create planning node - LLM decides next command
 * Following context7 pattern: pass RunnableConfig for proper streaming
 */
async function planLevel(
  state: BanditAgentState,
  config?: RunnableConfig
): Promise<Partial<BanditAgentState>> {
  const { currentLevel, levelGoal, commandHistory, sshConnectionId, currentPassword } = state

  // Establish SSH connection if needed
  if (!sshConnectionId) {
    const sshProxyUrl = process.env.SSH_PROXY_URL || 'http://localhost:3001'

    try {
      const connectData = await retryWithBackoff(
        async () => {
          const connectResponse = await fetch(`${sshProxyUrl}/ssh/connect`, {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({
              host: 'bandit.labs.overthewire.org',
              port: 2220,
              username: `bandit${currentLevel}`,
              password: currentPassword,
              testOnly: false,
            }),
          })

          const data = await connectResponse.json() as { connectionId?: string; success?: boolean; message?: string }

          if (!data.success || !data.connectionId) {
            throw new Error(data.message || 'Connection failed')
          }

          return data
        },
        3,
        1000,
        `SSH connection to bandit${currentLevel}`
      )

      // Update state with connection ID
      return {
        sshConnectionId: connectData.connectionId,
        status: 'planning',
      }
    } catch (error) {
      return {
        status: 'failed',
        error: `SSH connection failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
      }
    }
  }

  // Get LLM from config (injected by agent)
  const llm = (config?.configurable?.llm) as ChatOpenAI

  // Build context from recent commands
  const recentCommands = commandHistory.slice(-3).map(cmd =>
    `Command: ${cmd.command}\nOutput: ${cmd.output.slice(0, 300)}\nExit: ${cmd.exitCode}`
  ).join('\n\n')

  const messages = [
    new SystemMessage(SYSTEM_PROMPT),
    new HumanMessage(`Level ${currentLevel}: ${levelGoal}

Recent Commands:
${recentCommands || 'No commands yet'}

What command should I run next? Provide ONLY the exact command to execute.`),
  ]

  // Invoke LLM with retry logic
  let thought: string
  let tokensUsed = 0
  let costIncurred = 0

  try {
    const response = await retryWithBackoff(
      async () => llm.invoke(messages, config),
      3,
      2000,
      `LLM planning for level ${currentLevel}`
    )
    thought = response.content as string

    // Track token usage if available in response
    if (response.response_metadata?.tokenUsage) {
      tokensUsed = response.response_metadata.tokenUsage.totalTokens || 0
    } else if (response.usage_metadata) {
      tokensUsed = response.usage_metadata.total_tokens || 0
    }

    // Estimate cost based on token usage (rough estimate)
    // OpenRouter pricing varies, so this is approximate
    const estimatedPromptTokens = Math.floor(tokensUsed * 0.7)
    const estimatedCompletionTokens = Math.floor(tokensUsed * 0.3)
    // Rough average cost per million tokens: $1 for prompts, $5 for completions
    costIncurred = (estimatedPromptTokens / 1000000) * 1 + (estimatedCompletionTokens / 1000000) * 5
  } catch (error) {
    return {
      status: 'failed',
      error: `LLM planning failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
    }
  }

  return {
    thoughts: [{
      type: 'plan',
      content: thought,
      timestamp: new Date().toISOString(),
      level: currentLevel,
    }],
    totalTokens: tokensUsed,
    totalCost: costIncurred,
    status: 'executing',
  }
}

/**
 * Execute SSH command via proxy with PTY
 */
async function executeCommand(
  state: BanditAgentState,
  config?: RunnableConfig
): Promise<Partial<BanditAgentState>> {
  const { thoughts, currentLevel, sshConnectionId } = state

  // Extract command from latest thought
  const latestThought = thoughts[thoughts.length - 1]
  const commandMatch = latestThought.content.match(/```(?:bash|sh)?\s*\n?(.+?)\n?```/s) ||
                       latestThought.content.match(/^(.+)$/m)

  if (!commandMatch) {
    return {
      status: 'failed',
      error: 'Could not extract command from LLM response',
    }
  }

  const command = commandMatch[1].trim()

  // Validate command - prevent nested SSH and dangerous commands
  const forbiddenPatterns = [
    /^\s*ssh\s+/i,          // No nested SSH
    /^\s*scp\s+/i,          // No SCP
    /^\s*sudo\s+/i,         // No sudo
    /^\s*su\s+/i,           // No su
    /rm\s+.*-rf/i,          // No recursive force delete
  ]

  for (const pattern of forbiddenPatterns) {
    if (pattern.test(command)) {
      return {
        commandHistory: [{
          command,
          output: `ERROR: Forbidden command pattern detected. You are already in an SSH session. Use basic shell commands only.`,
          exitCode: 1,
          timestamp: new Date().toISOString(),
          level: currentLevel,
        }],
        status: 'planning', // Go back to planning with the error context
      }
    }
  }

  // Execute via SSH with PTY enabled with retry logic
  try {
    const sshProxyUrl = process.env.SSH_PROXY_URL || 'http://localhost:3001'

    const data = await retryWithBackoff(
      async () => {
        const response = await fetch(`${sshProxyUrl}/ssh/exec`, {
          method: 'POST',
          headers: { 'Content-Type': 'application/json' },
          body: JSON.stringify({
            connectionId: sshConnectionId,
            command,
            usePTY: true, // Enable PTY for full terminal capture
            timeout: 30000,
          }),
        })

        if (!response.ok) {
          throw new Error(`SSH exec returned ${response.status}`)
        }

        return await response.json() as { output?: string; exitCode?: number; success?: boolean }
      },
      2, // Fewer retries for command execution
      1500,
      `SSH exec: ${command.slice(0, 30)}...`
    )

    const result = {
      command,
      output: data.output || '',
      exitCode: data.exitCode || 1,
      timestamp: new Date().toISOString(),
      level: currentLevel,
    }

    return {
      commandHistory: [result],
      status: 'validating',
    }
  } catch (error) {
    return {
      status: 'failed',
      error: `SSH execution failed: ${error instanceof Error ? error.message : String(error)}`,
    }
  }
}

/**
 * Validate if password was found and test it
 */
async function validateResult(
  state: BanditAgentState,
  config?: RunnableConfig
): Promise<Partial<BanditAgentState>> {
  const { commandHistory, currentLevel } = state
  const lastCommand = commandHistory[commandHistory.length - 1]

  // Simple password extraction (32-char alphanumeric)
  const passwordMatch = lastCommand.output.match(/([A-Za-z0-9]{32,})/)

  if (passwordMatch) {
    const candidatePassword = passwordMatch[1]

    // Pre-advance validation: test the password with a non-interactive SSH connection
    try {
      const sshProxyUrl = process.env.SSH_PROXY_URL || 'http://localhost:3001'
      const testResponse = await fetch(`${sshProxyUrl}/ssh/connect`, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({
          host: 'bandit.labs.overthewire.org',
          port: 2220,
          username: `bandit${currentLevel + 1}`,
          password: candidatePassword,
          testOnly: true, // Just test, don't keep connection
        }),
      })

      const testData = await testResponse.json() as { success?: boolean; message?: string }

      if (testData.success) {
        // Password is valid, proceed to advancing
        return {
          nextPassword: candidatePassword,
          status: 'advancing',
        }
      } else {
        // Password is invalid, count as retry
        if (state.retryCount < state.maxRetries) {
          return {
            retryCount: state.retryCount + 1,
            status: 'planning',
            commandHistory: [{
              command: '[Password Validation]',
              output: `Extracted password "${candidatePassword}" failed validation: ${testData.message}`,
              exitCode: 1,
              timestamp: new Date().toISOString(),
              level: currentLevel,
            }],
          }
        } else {
          return {
            status: 'paused_for_user_action',
            error: `Max retries reached for level ${currentLevel}`,
          }
        }
      }
    } catch (error) {
      // If validation fails due to network error, proceed anyway (fail-open)
      console.warn('Password validation failed due to error, proceeding:', error)
      return {
        nextPassword: candidatePassword,
        status: 'advancing',
      }
    }
  }

  // No password found, retry if under limit
  if (state.retryCount < state.maxRetries) {
    return {
      retryCount: state.retryCount + 1,
      status: 'planning',
    }
  }

  return {
    status: 'paused_for_user_action',
    error: `Max retries reached for level ${state.currentLevel}`,
  }
}

/**
 * Advance to next level
 */
async function advanceLevel(
  state: BanditAgentState,
  config?: RunnableConfig
): Promise<Partial<BanditAgentState>> {
  const nextLevel = state.currentLevel + 1

  if (nextLevel > state.targetLevel) {
    return {
      status: 'complete',
      currentLevel: nextLevel,
      currentPassword: state.nextPassword || '',
    }
  }

  return {
    currentLevel: nextLevel,
    currentPassword: state.nextPassword || '',
    nextPassword: null,
    levelGoal: LEVEL_GOALS[nextLevel] || 'Unknown',
    retryCount: 0,
    status: 'planning',
  }
}

/**
 * Conditional routing function
 */
function shouldContinue(state: BanditAgentState): string {
  if (state.status === 'complete' || state.status === 'failed') return END
  if (state.status === 'paused' || state.status === 'paused_for_user_action') return END
  if (state.status === 'planning') return 'plan_level'
  if (state.status === 'executing') return 'execute_command'
  if (state.status === 'validating') return 'validate_result'
  if (state.status === 'advancing') return 'advance_level'
  return END
}

/**
 * Agent executor that can run in SSH proxy
 */
export class BanditAgent {
  private llm: ChatOpenAI
  private graph: ReturnType<typeof StateGraph.prototype.compile>
  private responseSender?: Response

  constructor(config: {
    runId: string
    modelName: string
    apiKey: string
    startLevel: number
    endLevel: number
    responseSender?: Response
  }) {
    this.llm = new ChatOpenAI({
      model: config.modelName,
      apiKey: config.apiKey,
      temperature: 0.7,
      configuration: {
        baseURL: 'https://openrouter.ai/api/v1',
      },
    })

    this.responseSender = config.responseSender
    this.graph = this.createGraph()
  }

  private createGraph() {
    const workflow = new StateGraph(BanditState)
      .addNode('plan_level', planLevel)
      .addNode('execute_command', executeCommand)
      .addNode('validate_result', validateResult)
      .addNode('advance_level', advanceLevel)
      .addEdge(START, 'plan_level')
      .addConditionalEdges('plan_level', shouldContinue)
      .addConditionalEdges('execute_command', shouldContinue)
      .addConditionalEdges('validate_result', shouldContinue)
      .addConditionalEdges('advance_level', shouldContinue)

    return workflow.compile()
  }

  private emit(event: any) {
    if (this.responseSender && !this.responseSender.writableEnded) {
      // Send as JSONL (newline-delimited JSON)
      this.responseSender.write(JSON.stringify(event) + '\n')
    }
  }

  async run(initialState: Partial<BanditAgentState>): Promise<void> {
    let finalState: BanditAgentState | null = null

    try {
      // Stream updates using context7 recommended pattern
      const stream = await this.graph.stream(
        initialState,
        {
          streamMode: "updates", // Per context7: emit after each step
          configurable: { llm: this.llm }, // Pass LLM through config
        }
      )

      for await (const update of stream) {
        // Emit each update as JSONL event
        const [nodeName, nodeOutput] = Object.entries(update)[0]

        // Track final state
        if (nodeOutput) {
          finalState = { ...finalState, ...nodeOutput } as BanditAgentState
        }

        this.emit({
          type: 'node_update',
          node: nodeName,
          data: nodeOutput,
          timestamp: new Date().toISOString(),
        })

        // Emit token usage updates
        if (nodeOutput.totalTokens || nodeOutput.totalCost) {
          this.emit({
            type: 'usage_update',
            data: {
              totalTokens: finalState?.totalTokens || 0,
              totalCost: finalState?.totalCost || 0,
            },
            timestamp: new Date().toISOString(),
          })
        }

        // Send specific event types based on node
        if (nodeName === 'plan_level' && nodeOutput.thoughts) {
          const thought = nodeOutput.thoughts[nodeOutput.thoughts.length - 1]

          // Emit as 'thinking' event for UI
          this.emit({
            type: 'thinking',
            data: {
              content: thought.content,
              level: thought.level,
            },
            timestamp: new Date().toISOString(),
          })

          // Also emit as 'agent_message' for chat panel
          this.emit({
            type: 'agent_message',
            data: {
              content: `Planning: ${thought.content}`,
              level: thought.level,
              metadata: {
                thoughtType: thought.type,
              },
            },
            timestamp: new Date().toISOString(),
          })
        }

        if (nodeName === 'execute_command' && nodeOutput.commandHistory) {
          const cmd = nodeOutput.commandHistory[nodeOutput.commandHistory.length - 1]

          // Emit tool call event
          this.emit({
            type: 'tool_call',
            data: {
              content: `ssh_exec: ${cmd.command}`,
              level: cmd.level,
              metadata: {
                tool: 'ssh_exec',
                command: cmd.command,
              },
            },
            timestamp: new Date().toISOString(),
          })

          // Emit terminal output with prompt
          this.emit({
            type: 'terminal_output',
            data: {
              content: `$ ${cmd.command}`,
              command: cmd.command,
              level: cmd.level,
            },
            timestamp: new Date().toISOString(),
          })

          // Emit command result (includes ANSI codes from PTY)
          this.emit({
            type: 'terminal_output',
            data: {
              content: cmd.output,
              level: cmd.level,
            },
            timestamp: new Date().toISOString(),
          })
        }

        if (nodeName === 'validate_result' && nodeOutput.nextPassword) {
          this.emit({
            type: 'agent_message',
            data: {
              content: `Password found: ${nodeOutput.nextPassword}`,
              level: nodeOutput.currentLevel,
            },
            timestamp: new Date().toISOString(),
          })
        }

        if (nodeName === 'advance_level' && nodeOutput.currentLevel !== undefined) {
          this.emit({
            type: 'level_complete',
            data: {
              content: `Level ${nodeOutput.currentLevel - 1} completed successfully`,
              level: nodeOutput.currentLevel - 1,
            },
            timestamp: new Date().toISOString(),
          })

          this.emit({
            type: 'agent_message',
            data: {
              content: `Advancing to Level ${nodeOutput.currentLevel}`,
              level: nodeOutput.currentLevel,
            },
            timestamp: new Date().toISOString(),
          })
        }

        if (nodeOutput.error) {
          this.emit({
            type: 'error',
            data: {
              content: nodeOutput.error,
              level: nodeOutput.currentLevel,
            },
            timestamp: new Date().toISOString(),
          })
        }
      }

      // Final completion event with status based on final state
      const status = finalState?.status || 'complete'
      const level = finalState?.currentLevel || 0
      let message = 'Agent run completed'

      if (status === 'failed') {
        message = finalState?.error || 'Run failed'
      } else if (status === 'complete') {
        message = `Successfully completed level ${level}`
      } else {
        message = `Run ended with status: ${status}`
      }

      this.emit({
        type: 'run_complete',
        data: {
          content: message,
          status: status === 'complete' ? 'success' : 'failed',
          level,
        },
        timestamp: new Date().toISOString(),
      })
    } catch (error) {
      this.emit({
        type: 'error',
        data: { content: error instanceof Error ? error.message : String(error) },
        timestamp: new Date().toISOString(),
      })
    } finally {
      if (this.responseSender && !this.responseSender.writableEnded) {
        this.responseSender.end()
      }
    }
  }
}