2025-10-13 10:21:50 -06:00

692 lines
21 KiB
TypeScript

/**
* LangGraph agent integrated with SSH proxy
* Runs in Node.js environment with full dependency support
* Based on context7 best practices for streaming and config passing
*/
import { StateGraph, END, START, Annotation } from "@langchain/langgraph"
import { HumanMessage, SystemMessage } from "@langchain/core/messages"
import { ChatOpenAI } from "@langchain/openai"
import type { RunnableConfig } from "@langchain/core/runnables"
import type { Client } from 'ssh2'
import type { Response } from 'express'
// Define state using Annotation for proper LangGraph typing
const BanditState = Annotation.Root({
runId: Annotation<string>,
currentLevel: Annotation<number>,
targetLevel: Annotation<number>,
currentPassword: Annotation<string>,
nextPassword: Annotation<string | null>,
levelGoal: Annotation<string>,
commandHistory: Annotation<Array<{
command: string
output: string
exitCode: number
timestamp: string
level: number
}>>({
reducer: (left, right) => left.concat(right),
default: () => [],
}),
thoughts: Annotation<Array<{
type: 'plan' | 'observation' | 'reasoning' | 'decision'
content: string
timestamp: string
level: number
}>>({
reducer: (left, right) => left.concat(right),
default: () => [],
}),
status: Annotation<'planning' | 'executing' | 'validating' | 'advancing' | 'paused' | 'paused_for_user_action' | 'complete' | 'failed'>,
retryCount: Annotation<number>,
maxRetries: Annotation<number>,
sshConnectionId: Annotation<string | null>,
error: Annotation<string | null>,
totalTokens: Annotation<number>({
reducer: (left, right) => left + right,
default: () => 0,
}),
totalCost: Annotation<number>({
reducer: (left, right) => left + right,
default: () => 0,
}),
})
type BanditAgentState = typeof BanditState.State
const LEVEL_GOALS: Record<number, string> = {
0: "Read 'readme' file in home directory",
1: "Read '-' file (use 'cat ./-' or 'cat < -')",
2: "Find and read hidden file with spaces in name",
3: "Find file with specific permissions",
4: "Find file in inhere directory that is human-readable",
5: "Find file owned by bandit7, group bandit6, 33 bytes",
// Add more as needed
}
const SYSTEM_PROMPT = `You are BanditRunner, an autonomous operator solving the OverTheWire Bandit wargame.
CRITICAL RULES:
1. You are ALREADY connected via SSH. Do NOT run 'ssh' commands yourself.
2. Only use safe shell commands: ls, cat, grep, find, strings, file, base64, tar, gzip, etc.
3. Think step-by-step before executing commands
4. Extract passwords (32-char alphanumeric strings) from command output
5. Validate before advancing to the next level
FORBIDDEN:
- Do NOT run: ssh, scp, sudo, su, rm -rf, chmod on system files
- Do NOT attempt nested SSH connections - you already have an active shell
WORKFLOW:
1. Plan - analyze level goal and formulate command strategy
2. Execute - run a single, focused command
3. Validate - check output for password (32-char alphanumeric)
4. Advance - proceed to next level with found password`
/**
* Retry helper with exponential backoff
*/
async function retryWithBackoff<T>(
fn: () => Promise<T>,
maxRetries: number = 3,
baseDelay: number = 1000,
context: string = 'operation'
): Promise<T> {
let lastError: Error | null = null
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
return await fn()
} catch (error) {
lastError = error instanceof Error ? error : new Error(String(error))
if (attempt < maxRetries) {
const delay = baseDelay * Math.pow(2, attempt) // Exponential backoff
console.log(`${context} failed (attempt ${attempt + 1}/${maxRetries + 1}), retrying in ${delay}ms...`)
await new Promise(resolve => setTimeout(resolve, delay))
}
}
}
throw new Error(`${context} failed after ${maxRetries + 1} attempts: ${lastError?.message}`)
}
/**
* Create planning node - LLM decides next command
* Following context7 pattern: pass RunnableConfig for proper streaming
*/
async function planLevel(
state: BanditAgentState,
config?: RunnableConfig
): Promise<Partial<BanditAgentState>> {
const { currentLevel, levelGoal, commandHistory, sshConnectionId, currentPassword } = state
// Establish SSH connection if needed
if (!sshConnectionId) {
const sshProxyUrl = process.env.SSH_PROXY_URL || 'http://localhost:3001'
try {
const connectData = await retryWithBackoff(
async () => {
const connectResponse = await fetch(`${sshProxyUrl}/ssh/connect`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
host: 'bandit.labs.overthewire.org',
port: 2220,
username: `bandit${currentLevel}`,
password: currentPassword,
testOnly: false,
}),
})
const data = await connectResponse.json() as { connectionId?: string; success?: boolean; message?: string }
if (!data.success || !data.connectionId) {
throw new Error(data.message || 'Connection failed')
}
return data
},
3,
1000,
`SSH connection to bandit${currentLevel}`
)
// Update state with connection ID
return {
sshConnectionId: connectData.connectionId,
status: 'planning',
}
} catch (error) {
return {
status: 'failed',
error: `SSH connection failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
}
}
}
// Get LLM from config (injected by agent)
const llm = (config?.configurable?.llm) as ChatOpenAI
// Build context from recent commands
const recentCommands = commandHistory.slice(-3).map(cmd =>
`Command: ${cmd.command}\nOutput: ${cmd.output.slice(0, 300)}\nExit: ${cmd.exitCode}`
).join('\n\n')
const messages = [
new SystemMessage(SYSTEM_PROMPT),
new HumanMessage(`Level ${currentLevel}: ${levelGoal}
Recent Commands:
${recentCommands || 'No commands yet'}
What command should I run next? Provide ONLY the exact command to execute.`),
]
// Invoke LLM with retry logic
let thought: string
let tokensUsed = 0
let costIncurred = 0
try {
const response = await retryWithBackoff(
async () => llm.invoke(messages, config),
3,
2000,
`LLM planning for level ${currentLevel}`
)
thought = response.content as string
// Track token usage if available in response
if (response.response_metadata?.tokenUsage) {
tokensUsed = response.response_metadata.tokenUsage.totalTokens || 0
} else if (response.usage_metadata) {
tokensUsed = response.usage_metadata.total_tokens || 0
}
// Estimate cost based on token usage (rough estimate)
// OpenRouter pricing varies, so this is approximate
const estimatedPromptTokens = Math.floor(tokensUsed * 0.7)
const estimatedCompletionTokens = Math.floor(tokensUsed * 0.3)
// Rough average cost per million tokens: $1 for prompts, $5 for completions
costIncurred = (estimatedPromptTokens / 1000000) * 1 + (estimatedCompletionTokens / 1000000) * 5
} catch (error) {
return {
status: 'failed',
error: `LLM planning failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
}
}
return {
thoughts: [{
type: 'plan',
content: thought,
timestamp: new Date().toISOString(),
level: currentLevel,
}],
totalTokens: tokensUsed,
totalCost: costIncurred,
status: 'executing',
}
}
/**
* Execute SSH command via proxy with PTY
*/
async function executeCommand(
state: BanditAgentState,
config?: RunnableConfig
): Promise<Partial<BanditAgentState>> {
const { thoughts, currentLevel, sshConnectionId } = state
// Extract command from latest thought
const latestThought = thoughts[thoughts.length - 1]
const commandMatch = latestThought.content.match(/```(?:bash|sh)?\s*\n?(.+?)\n?```/s) ||
latestThought.content.match(/^(.+)$/m)
if (!commandMatch) {
return {
status: 'failed',
error: 'Could not extract command from LLM response',
}
}
const command = commandMatch[1].trim()
// Validate command - prevent nested SSH and dangerous commands
const forbiddenPatterns = [
/^\s*ssh\s+/i, // No nested SSH
/^\s*scp\s+/i, // No SCP
/^\s*sudo\s+/i, // No sudo
/^\s*su\s+/i, // No su
/rm\s+.*-rf/i, // No recursive force delete
]
for (const pattern of forbiddenPatterns) {
if (pattern.test(command)) {
return {
commandHistory: [{
command,
output: `ERROR: Forbidden command pattern detected. You are already in an SSH session. Use basic shell commands only.`,
exitCode: 1,
timestamp: new Date().toISOString(),
level: currentLevel,
}],
status: 'planning', // Go back to planning with the error context
}
}
}
// Execute via SSH with PTY enabled with retry logic
try {
const sshProxyUrl = process.env.SSH_PROXY_URL || 'http://localhost:3001'
const data = await retryWithBackoff(
async () => {
const response = await fetch(`${sshProxyUrl}/ssh/exec`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
connectionId: sshConnectionId,
command,
usePTY: true, // Enable PTY for full terminal capture
timeout: 30000,
}),
})
if (!response.ok) {
throw new Error(`SSH exec returned ${response.status}`)
}
return await response.json() as { output?: string; exitCode?: number; success?: boolean }
},
2, // Fewer retries for command execution
1500,
`SSH exec: ${command.slice(0, 30)}...`
)
const result = {
command,
output: data.output || '',
exitCode: data.exitCode || 1,
timestamp: new Date().toISOString(),
level: currentLevel,
}
return {
commandHistory: [result],
status: 'validating',
}
} catch (error) {
return {
status: 'failed',
error: `SSH execution failed: ${error instanceof Error ? error.message : String(error)}`,
}
}
}
/**
* Validate if password was found and test it
*/
async function validateResult(
state: BanditAgentState,
config?: RunnableConfig
): Promise<Partial<BanditAgentState>> {
const { commandHistory, currentLevel } = state
const lastCommand = commandHistory[commandHistory.length - 1]
// Simple password extraction (32-char alphanumeric)
const passwordMatch = lastCommand.output.match(/([A-Za-z0-9]{32,})/)
if (passwordMatch) {
const candidatePassword = passwordMatch[1]
// Pre-advance validation: test the password with a non-interactive SSH connection
try {
const sshProxyUrl = process.env.SSH_PROXY_URL || 'http://localhost:3001'
const testResponse = await fetch(`${sshProxyUrl}/ssh/connect`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
host: 'bandit.labs.overthewire.org',
port: 2220,
username: `bandit${currentLevel + 1}`,
password: candidatePassword,
testOnly: true, // Just test, don't keep connection
}),
})
const testData = await testResponse.json() as { success?: boolean; message?: string }
if (testData.success) {
// Password is valid, proceed to advancing
return {
nextPassword: candidatePassword,
status: 'advancing',
}
} else {
// Password is invalid, count as retry
if (state.retryCount < state.maxRetries) {
return {
retryCount: state.retryCount + 1,
status: 'planning',
commandHistory: [{
command: '[Password Validation]',
output: `Extracted password "${candidatePassword}" failed validation: ${testData.message}`,
exitCode: 1,
timestamp: new Date().toISOString(),
level: currentLevel,
}],
}
} else {
return {
status: 'paused_for_user_action',
error: `Max retries reached for level ${currentLevel}`,
}
}
}
} catch (error) {
// If validation fails due to network error, proceed anyway (fail-open)
console.warn('Password validation failed due to error, proceeding:', error)
return {
nextPassword: candidatePassword,
status: 'advancing',
}
}
}
// No password found, retry if under limit
if (state.retryCount < state.maxRetries) {
return {
retryCount: state.retryCount + 1,
status: 'planning',
}
}
return {
status: 'paused_for_user_action',
error: `Max retries reached for level ${state.currentLevel}`,
}
}
/**
* Advance to next level
*/
async function advanceLevel(
state: BanditAgentState,
config?: RunnableConfig
): Promise<Partial<BanditAgentState>> {
const nextLevel = state.currentLevel + 1
if (nextLevel > state.targetLevel) {
return {
status: 'complete',
currentLevel: nextLevel,
currentPassword: state.nextPassword || '',
}
}
return {
currentLevel: nextLevel,
currentPassword: state.nextPassword || '',
nextPassword: null,
levelGoal: LEVEL_GOALS[nextLevel] || 'Unknown',
retryCount: 0,
status: 'planning',
}
}
/**
* Conditional routing function
*/
function shouldContinue(state: BanditAgentState): string {
if (state.status === 'complete' || state.status === 'failed') return END
if (state.status === 'paused' || state.status === 'paused_for_user_action') return END
if (state.status === 'planning') return 'plan_level'
if (state.status === 'executing') return 'execute_command'
if (state.status === 'validating') return 'validate_result'
if (state.status === 'advancing') return 'advance_level'
return END
}
/**
* Agent executor that can run in SSH proxy
*/
export class BanditAgent {
private llm: ChatOpenAI
private graph: ReturnType<typeof StateGraph.prototype.compile>
private responseSender?: Response
constructor(config: {
runId: string
modelName: string
apiKey: string
startLevel: number
endLevel: number
responseSender?: Response
}) {
this.llm = new ChatOpenAI({
model: config.modelName,
apiKey: config.apiKey,
temperature: 0.7,
configuration: {
baseURL: 'https://openrouter.ai/api/v1',
},
})
this.responseSender = config.responseSender
this.graph = this.createGraph()
}
private createGraph() {
const workflow = new StateGraph(BanditState)
.addNode('plan_level', planLevel)
.addNode('execute_command', executeCommand)
.addNode('validate_result', validateResult)
.addNode('advance_level', advanceLevel)
.addEdge(START, 'plan_level')
.addConditionalEdges('plan_level', shouldContinue)
.addConditionalEdges('execute_command', shouldContinue)
.addConditionalEdges('validate_result', shouldContinue)
.addConditionalEdges('advance_level', shouldContinue)
return workflow.compile()
}
private emit(event: any) {
if (this.responseSender && !this.responseSender.writableEnded) {
// Send as JSONL (newline-delimited JSON)
this.responseSender.write(JSON.stringify(event) + '\n')
}
}
async run(initialState: Partial<BanditAgentState>): Promise<void> {
let finalState: BanditAgentState | null = null
try {
// Stream updates using context7 recommended pattern
const stream = await this.graph.stream(
initialState,
{
streamMode: "updates", // Per context7: emit after each step
configurable: { llm: this.llm }, // Pass LLM through config
}
)
for await (const update of stream) {
// Emit each update as JSONL event
const [nodeName, nodeOutput] = Object.entries(update)[0]
// Track final state
if (nodeOutput) {
finalState = { ...finalState, ...nodeOutput } as BanditAgentState
}
this.emit({
type: 'node_update',
node: nodeName,
data: nodeOutput,
timestamp: new Date().toISOString(),
})
// Emit token usage updates
if (nodeOutput.totalTokens || nodeOutput.totalCost) {
this.emit({
type: 'usage_update',
data: {
totalTokens: finalState?.totalTokens || 0,
totalCost: finalState?.totalCost || 0,
},
timestamp: new Date().toISOString(),
})
}
// Send specific event types based on node
if (nodeName === 'plan_level' && nodeOutput.thoughts) {
const thought = nodeOutput.thoughts[nodeOutput.thoughts.length - 1]
// Emit as 'thinking' event for UI
this.emit({
type: 'thinking',
data: {
content: thought.content,
level: thought.level,
},
timestamp: new Date().toISOString(),
})
// Also emit as 'agent_message' for chat panel
this.emit({
type: 'agent_message',
data: {
content: `Planning: ${thought.content}`,
level: thought.level,
metadata: {
thoughtType: thought.type,
},
},
timestamp: new Date().toISOString(),
})
}
if (nodeName === 'execute_command' && nodeOutput.commandHistory) {
const cmd = nodeOutput.commandHistory[nodeOutput.commandHistory.length - 1]
// Emit tool call event
this.emit({
type: 'tool_call',
data: {
content: `ssh_exec: ${cmd.command}`,
level: cmd.level,
metadata: {
tool: 'ssh_exec',
command: cmd.command,
},
},
timestamp: new Date().toISOString(),
})
// Emit terminal output with prompt
this.emit({
type: 'terminal_output',
data: {
content: `$ ${cmd.command}`,
command: cmd.command,
level: cmd.level,
},
timestamp: new Date().toISOString(),
})
// Emit command result (includes ANSI codes from PTY)
this.emit({
type: 'terminal_output',
data: {
content: cmd.output,
level: cmd.level,
},
timestamp: new Date().toISOString(),
})
}
if (nodeName === 'validate_result' && nodeOutput.nextPassword) {
this.emit({
type: 'agent_message',
data: {
content: `Password found: ${nodeOutput.nextPassword}`,
level: nodeOutput.currentLevel,
},
timestamp: new Date().toISOString(),
})
}
if (nodeName === 'advance_level' && nodeOutput.currentLevel !== undefined) {
this.emit({
type: 'level_complete',
data: {
content: `Level ${nodeOutput.currentLevel - 1} completed successfully`,
level: nodeOutput.currentLevel - 1,
},
timestamp: new Date().toISOString(),
})
this.emit({
type: 'agent_message',
data: {
content: `Advancing to Level ${nodeOutput.currentLevel}`,
level: nodeOutput.currentLevel,
},
timestamp: new Date().toISOString(),
})
}
if (nodeOutput.error) {
this.emit({
type: 'error',
data: {
content: nodeOutput.error,
level: nodeOutput.currentLevel,
},
timestamp: new Date().toISOString(),
})
}
}
// Final completion event with status based on final state
const status = finalState?.status || 'complete'
const level = finalState?.currentLevel || 0
let message = 'Agent run completed'
if (status === 'failed') {
message = finalState?.error || 'Run failed'
} else if (status === 'complete') {
message = `Successfully completed level ${level}`
} else {
message = `Run ended with status: ${status}`
}
this.emit({
type: 'run_complete',
data: {
content: message,
status: status === 'complete' ? 'success' : 'failed',
level,
},
timestamp: new Date().toISOString(),
})
} catch (error) {
this.emit({
type: 'error',
data: { content: error instanceof Error ? error.message : String(error) },
timestamp: new Date().toISOString(),
})
} finally {
if (this.responseSender && !this.responseSender.writableEnded) {
this.responseSender.end()
}
}
}
}