import { spawn, type ChildProcess } from "node:child_process"; import { existsSync, mkdirSync, readFileSync, writeFileSync, copyFileSync, unlinkSync, readdirSync, } from "node:fs"; import { join } from "node:path"; import { execFileSync } from "node:child_process"; import { CONFIG } from "./config.js"; import { ensureBridge, ensureNat, allocateIp, releaseIp, createTap, deleteTap, macFromOctet, } from "./network.js"; import * as api from "./firecracker-api.js"; export interface AgentInfo { name: string; nick: string; model: string; template: string; ip: string; octet: number; tapDevice: string; socketPath: string; rootfsPath: string; pid: number; startedAt: string; } interface AgentTemplate { name: string; nick: string; model: string; trigger: string; persona: string; } const AGENTS_FILE = join(CONFIG.baseDir, "agents.json"); const TEMPLATES_DIR = join(CONFIG.baseDir, "templates"); const AGENT_ROOTFS = join(CONFIG.baseDir, "agent-rootfs.ext4"); const WORKSPACES_DIR = CONFIG.workspacesDir; function log(msg: string) { process.stderr.write(`[agent-mgr] ${msg}\n`); } function loadAgents(): Record { try { return JSON.parse(readFileSync(AGENTS_FILE, "utf-8")); } catch { return {}; } } function saveAgents(agents: Record) { writeFileSync(AGENTS_FILE, JSON.stringify(agents, null, 2)); } export function loadTemplate(name: string): AgentTemplate { const path = join(TEMPLATES_DIR, `${name}.json`); if (!existsSync(path)) { throw new Error(`Template "${name}" not found at ${path}`); } return JSON.parse(readFileSync(path, "utf-8")); } export function listTemplates(): string[] { try { return readdirSync(TEMPLATES_DIR) .filter((f) => f.endsWith(".json")) .map((f) => f.replace(".json", "")); } catch { return []; } } function injectAgentConfig( rootfsPath: string, config: { nick: string; model: string; trigger: string }, persona: string ) { const mountPoint = `/tmp/fireclaw-agent-${Date.now()}`; mkdirSync(mountPoint, { recursive: true }); try { execFileSync("sudo", ["mount", "-o", "loop", rootfsPath, mountPoint], { stdio: "pipe", }); execFileSync( "sudo", ["mkdir", "-p", join(mountPoint, "etc/agent")], { stdio: "pipe" } ); // Write config const configJson = JSON.stringify({ nick: config.nick, model: config.model, trigger: config.trigger, server: "172.16.0.1", port: 6667, ollama_url: "http://172.16.0.1:11434", }); execFileSync( "sudo", [ "bash", "-c", `echo '${configJson}' > ${join(mountPoint, "etc/agent/config.json")}`, ], { stdio: "pipe" } ); // Write persona execFileSync( "sudo", [ "bash", "-c", `cat > ${join(mountPoint, "etc/agent/persona.md")} << 'PERSONA_EOF'\n${persona}\nPERSONA_EOF`, ], { stdio: "pipe" } ); // Inject SSH key for debugging access execFileSync("sudo", ["mkdir", "-p", join(mountPoint, "root/.ssh")], { stdio: "pipe", }); if (existsSync(CONFIG.sshPubKeyPath)) { execFileSync( "sudo", [ "cp", CONFIG.sshPubKeyPath, join(mountPoint, "root/.ssh/authorized_keys"), ], { stdio: "pipe" } ); execFileSync( "sudo", ["chmod", "600", join(mountPoint, "root/.ssh/authorized_keys")], { stdio: "pipe" } ); } } finally { try { execFileSync("sudo", ["umount", mountPoint], { stdio: "pipe" }); } catch {} try { execFileSync("rmdir", [mountPoint], { stdio: "pipe" }); } catch {} } } function ensureWorkspace(agentName: string): string { mkdirSync(WORKSPACES_DIR, { recursive: true }); const imgPath = join(WORKSPACES_DIR, `${agentName}.ext4`); if (!existsSync(imgPath)) { log(`Creating workspace for "${agentName}" (${CONFIG.workspaceSizeMib} MiB)...`); execFileSync("truncate", ["-s", `${CONFIG.workspaceSizeMib}M`, imgPath], { stdio: "pipe", }); execFileSync("sudo", ["/usr/sbin/mkfs.ext4", "-q", imgPath], { stdio: "pipe", }); // Seed with MEMORY.md template const mountPoint = `/tmp/fireclaw-ws-${Date.now()}`; mkdirSync(mountPoint, { recursive: true }); try { execFileSync("sudo", ["mount", "-o", "loop", imgPath, mountPoint], { stdio: "pipe", }); execFileSync( "sudo", ["bash", "-c", `mkdir -p ${mountPoint}/memory && echo '# Agent Memory' > ${mountPoint}/MEMORY.md`], { stdio: "pipe" } ); execFileSync("sudo", ["chown", "-R", "0:0", mountPoint], { stdio: "pipe", }); } finally { try { execFileSync("sudo", ["umount", mountPoint], { stdio: "pipe" }); } catch {} try { execFileSync("rmdir", [mountPoint], { stdio: "pipe" }); } catch {} } } return imgPath; } function waitForSocket(socketPath: string): Promise { return new Promise((resolve, reject) => { const deadline = Date.now() + 5_000; const check = () => { if (existsSync(socketPath)) { setTimeout(resolve, 200); return; } if (Date.now() > deadline) { reject(new Error("Firecracker socket did not appear")); return; } setTimeout(check, 50); }; check(); }); } export async function startAgent( templateName: string, overrides?: { name?: string; model?: string } ): Promise { if (!existsSync(AGENT_ROOTFS)) { throw new Error( `Agent rootfs not found at ${AGENT_ROOTFS}. Build it first.` ); } const template = loadTemplate(templateName); const name = overrides?.name ?? template.name; const nick = overrides?.name ?? template.nick; const model = overrides?.model ?? template.model; // Check not already running const agents = loadAgents(); if (agents[name]) { throw new Error(`Agent "${name}" is already running`); } log(`Starting agent "${name}" (template: ${templateName})...`); // Allocate resources const { ip, octet } = allocateIp(); const tapDevice = `fctap${octet}`; const socketPath = join(CONFIG.socketDir, `agent-${name}.sock`); const rootfsPath = join(CONFIG.runsDir, `agent-${name}.ext4`); mkdirSync(CONFIG.socketDir, { recursive: true }); mkdirSync(CONFIG.runsDir, { recursive: true }); // Prepare rootfs copyFileSync(AGENT_ROOTFS, rootfsPath); injectAgentConfig( rootfsPath, { nick, model, trigger: template.trigger }, template.persona ); // Create/get persistent workspace const workspacePath = ensureWorkspace(name); // Setup network ensureBridge(); ensureNat(); createTap(tapDevice); // Boot VM const proc = spawn( CONFIG.firecrackerBin, ["--api-sock", socketPath], { stdio: "pipe", detached: true } ); proc.unref(); await waitForSocket(socketPath); const bootArgs = [ "console=ttyS0", "reboot=k", "panic=1", "pci=off", "root=/dev/vda", "rw", `ip=${ip}::${CONFIG.bridge.gateway}:${CONFIG.bridge.netmask}::eth0:off`, ].join(" "); await api.putBootSource(socketPath, CONFIG.kernelPath, bootArgs); await api.putDrive(socketPath, "rootfs", rootfsPath); await api.putDrive(socketPath, "workspace", workspacePath, false, false); await api.putNetworkInterface( socketPath, "eth0", tapDevice, macFromOctet(octet) ); await api.putMachineConfig( socketPath, CONFIG.vm.vcpuCount, CONFIG.vm.memSizeMib ); await api.startInstance(socketPath); const info: AgentInfo = { name, nick, model, template: templateName, ip, octet, tapDevice, socketPath, rootfsPath, pid: proc.pid!, startedAt: new Date().toISOString(), }; agents[name] = info; saveAgents(agents); log(`Agent "${name}" started: nick=${nick} ip=${ip}`); return info; } export async function stopAgent(name: string) { const agents = loadAgents(); const info = agents[name]; if (!info) { throw new Error(`Agent "${name}" is not running`); } log(`Stopping agent "${name}"...`); // Graceful shutdown: SSH in and kill the agent process so it sends IRC QUIT try { execFileSync( "ssh", [ "-o", "StrictHostKeyChecking=no", "-o", "UserKnownHostsFile=/dev/null", "-o", "ConnectTimeout=3", "-i", CONFIG.sshKeyPath, `root@${info.ip}`, "killall python3 2>/dev/null; sleep 1", ], { stdio: "pipe", timeout: 5_000 } ); } catch { // Best effort — VM might already be unreachable } // Kill firecracker process and wait for it to die try { process.kill(info.pid, "SIGKILL"); // Wait for process to actually exit before cleaning up resources for (let i = 0; i < 20; i++) { try { process.kill(info.pid, 0); // Check if alive await new Promise((r) => setTimeout(r, 200)); } catch { break; // Process is gone } } } catch { // Already dead } // Small delay to let kernel release the tap device await new Promise((r) => setTimeout(r, 500)); // Cleanup with retry for tap try { unlinkSync(info.socketPath); } catch {} for (let attempt = 0; attempt < 3; attempt++) { try { deleteTap(info.tapDevice); break; } catch { if (attempt < 2) await new Promise((r) => setTimeout(r, 1000)); } } releaseIp(info.octet); try { unlinkSync(info.rootfsPath); } catch {} delete agents[name]; saveAgents(agents); log(`Agent "${name}" stopped.`); } export function listAgents(): AgentInfo[] { const agents = loadAgents(); // Verify processes are still alive for (const [name, info] of Object.entries(agents)) { try { process.kill(info.pid, 0); } catch { // Process is dead, clean up log(`Agent "${name}" is dead, cleaning up...`); try { deleteTap(info.tapDevice); } catch {} try { releaseIp(info.octet); } catch {} try { unlinkSync(info.rootfsPath); } catch {} try { unlinkSync(info.socketPath); } catch {} delete agents[name]; } } saveAgents(agents); return Object.values(agents); } export async function reloadAgent( name: string, updates: { model?: string; persona?: string; trigger?: string } ) { const agents = loadAgents(); const info = agents[name]; if (!info) { throw new Error(`Agent "${name}" is not running`); } log(`Reloading agent "${name}"...`); // Build updated config const configUpdates: Record = {}; if (updates.model) { configUpdates.model = updates.model; info.model = updates.model; } if (updates.trigger) configUpdates.trigger = updates.trigger; // Write updated config as a temp file on the VM via SSH const sshOpts = [ "-o", "StrictHostKeyChecking=no", "-o", "UserKnownHostsFile=/dev/null", "-o", "ConnectTimeout=5", "-i", CONFIG.sshKeyPath, ]; const sshTarget = `root@${info.ip}`; try { if (Object.keys(configUpdates).length > 0) { // Read current config from VM const currentRaw = execFileSync( "ssh", [...sshOpts, sshTarget, "cat /etc/agent/config.json"], { encoding: "utf-8", timeout: 10_000 } ); const current = JSON.parse(currentRaw); Object.assign(current, configUpdates); const newConfig = JSON.stringify(current); // Write back via stdin execFileSync( "ssh", [...sshOpts, sshTarget, `cat > /etc/agent/config.json`], { input: newConfig, timeout: 10_000 } ); } if (updates.persona) { execFileSync( "ssh", [...sshOpts, sshTarget, `cat > /etc/agent/persona.md`], { input: updates.persona, timeout: 10_000 } ); } // Signal agent to reload execFileSync( "ssh", [...sshOpts, sshTarget, "killall -HUP python3"], { stdio: "pipe", timeout: 10_000 } ); } catch (err) { throw new Error(`Failed to reload agent: ${err}`); } saveAgents(agents); log(`Agent "${name}" reloaded.`); } export function reconcileAgents(): { adopted: string[]; cleaned: string[] } { const agents = loadAgents(); const adopted: string[] = []; const cleaned: string[] = []; for (const [name, info] of Object.entries(agents)) { let alive = false; try { process.kill(info.pid, 0); alive = true; } catch { // Process is dead } if (alive) { adopted.push(name); log(`Adopted running agent "${name}" (PID ${info.pid}, ${info.ip})`); } else { log(`Cleaning dead agent "${name}" (PID ${info.pid} gone)...`); // Clean up resources from dead agent try { deleteTap(info.tapDevice); } catch {} try { releaseIp(info.octet); } catch {} try { unlinkSync(info.rootfsPath); } catch {} try { unlinkSync(info.socketPath); } catch {} delete agents[name]; cleaned.push(name); } } // Scan for orphan firecracker processes not in agents.json try { const psOutput = execFileSync("pgrep", ["-a", "firecracker"], { encoding: "utf-8", }); for (const line of psOutput.trim().split("\n")) { if (!line) continue; const match = line.match(/agent-(\S+)\.sock/); if (match) { const agentName = match[1]; if (!agents[agentName]) { const pid = parseInt(line.split(/\s+/)[0]); log(`Found orphan firecracker process for "${agentName}" (PID ${pid}), killing...`); try { process.kill(pid, "SIGKILL"); } catch {} cleaned.push(`orphan:${agentName}`); } } } } catch { // No firecracker processes running — that's fine } saveAgents(agents); if (adopted.length === 0 && cleaned.length === 0) { log("No agents to reconcile."); } else { log(`Reconciled: ${adopted.length} adopted, ${cleaned.length} cleaned.`); } return { adopted, cleaned }; } export async function stopAllAgents() { const agents = loadAgents(); for (const name of Object.keys(agents)) { await stopAgent(name); } }