Files
fireclaw/src/agent-manager.ts
ansible ff694d12f6 Initial commit — fireclaw multi-agent system
Firecracker microVM-based multi-agent system with IRC orchestration and local LLMs.

Features:
- Ephemeral command runner with VM snapshots (~1.1s)
- Multi-agent orchestration via overseer IRC bot
- 5 agent templates (worker, coder, researcher, quick, creative)
- Tool access (shell + podman containers inside VMs)
- Persistent workspace + memory system (MEMORY.md pattern)
- Agent hot-reload (model/persona swap via SSH + SIGHUP)
- Non-root agents, graceful shutdown, crash recovery
- Agent-to-agent communication via IRC
- DM support, /invite support
- Systemd service, 20 regression tests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 13:28:29 +00:00

558 lines
14 KiB
TypeScript

import { spawn, type ChildProcess } from "node:child_process";
import {
existsSync,
mkdirSync,
readFileSync,
writeFileSync,
copyFileSync,
unlinkSync,
readdirSync,
} from "node:fs";
import { join } from "node:path";
import { execFileSync } from "node:child_process";
import { CONFIG } from "./config.js";
import {
ensureBridge,
ensureNat,
allocateIp,
releaseIp,
createTap,
deleteTap,
macFromOctet,
} from "./network.js";
import * as api from "./firecracker-api.js";
export interface AgentInfo {
name: string;
nick: string;
model: string;
template: string;
ip: string;
octet: number;
tapDevice: string;
socketPath: string;
rootfsPath: string;
pid: number;
startedAt: string;
}
interface AgentTemplate {
name: string;
nick: string;
model: string;
trigger: string;
persona: string;
}
const AGENTS_FILE = join(CONFIG.baseDir, "agents.json");
const TEMPLATES_DIR = join(CONFIG.baseDir, "templates");
const AGENT_ROOTFS = join(CONFIG.baseDir, "agent-rootfs.ext4");
const WORKSPACES_DIR = CONFIG.workspacesDir;
function log(msg: string) {
process.stderr.write(`[agent-mgr] ${msg}\n`);
}
function loadAgents(): Record<string, AgentInfo> {
try {
return JSON.parse(readFileSync(AGENTS_FILE, "utf-8"));
} catch {
return {};
}
}
function saveAgents(agents: Record<string, AgentInfo>) {
writeFileSync(AGENTS_FILE, JSON.stringify(agents, null, 2));
}
export function loadTemplate(name: string): AgentTemplate {
const path = join(TEMPLATES_DIR, `${name}.json`);
if (!existsSync(path)) {
throw new Error(`Template "${name}" not found at ${path}`);
}
return JSON.parse(readFileSync(path, "utf-8"));
}
export function listTemplates(): string[] {
try {
return readdirSync(TEMPLATES_DIR)
.filter((f) => f.endsWith(".json"))
.map((f) => f.replace(".json", ""));
} catch {
return [];
}
}
function injectAgentConfig(
rootfsPath: string,
config: { nick: string; model: string; trigger: string },
persona: string
) {
const mountPoint = `/tmp/fireclaw-agent-${Date.now()}`;
mkdirSync(mountPoint, { recursive: true });
try {
execFileSync("sudo", ["mount", "-o", "loop", rootfsPath, mountPoint], {
stdio: "pipe",
});
execFileSync(
"sudo",
["mkdir", "-p", join(mountPoint, "etc/agent")],
{ stdio: "pipe" }
);
// Write config
const configJson = JSON.stringify({
nick: config.nick,
model: config.model,
trigger: config.trigger,
server: "172.16.0.1",
port: 6667,
ollama_url: "http://172.16.0.1:11434",
});
execFileSync(
"sudo",
[
"bash",
"-c",
`echo '${configJson}' > ${join(mountPoint, "etc/agent/config.json")}`,
],
{ stdio: "pipe" }
);
// Write persona
execFileSync(
"sudo",
[
"bash",
"-c",
`cat > ${join(mountPoint, "etc/agent/persona.md")} << 'PERSONA_EOF'\n${persona}\nPERSONA_EOF`,
],
{ stdio: "pipe" }
);
// Inject SSH key for debugging access
execFileSync("sudo", ["mkdir", "-p", join(mountPoint, "root/.ssh")], {
stdio: "pipe",
});
if (existsSync(CONFIG.sshPubKeyPath)) {
execFileSync(
"sudo",
[
"cp",
CONFIG.sshPubKeyPath,
join(mountPoint, "root/.ssh/authorized_keys"),
],
{ stdio: "pipe" }
);
execFileSync(
"sudo",
["chmod", "600", join(mountPoint, "root/.ssh/authorized_keys")],
{ stdio: "pipe" }
);
}
} finally {
try {
execFileSync("sudo", ["umount", mountPoint], { stdio: "pipe" });
} catch {}
try {
execFileSync("rmdir", [mountPoint], { stdio: "pipe" });
} catch {}
}
}
function ensureWorkspace(agentName: string): string {
mkdirSync(WORKSPACES_DIR, { recursive: true });
const imgPath = join(WORKSPACES_DIR, `${agentName}.ext4`);
if (!existsSync(imgPath)) {
log(`Creating workspace for "${agentName}" (${CONFIG.workspaceSizeMib} MiB)...`);
execFileSync("truncate", ["-s", `${CONFIG.workspaceSizeMib}M`, imgPath], {
stdio: "pipe",
});
execFileSync("sudo", ["/usr/sbin/mkfs.ext4", "-q", imgPath], {
stdio: "pipe",
});
// Seed with MEMORY.md template
const mountPoint = `/tmp/fireclaw-ws-${Date.now()}`;
mkdirSync(mountPoint, { recursive: true });
try {
execFileSync("sudo", ["mount", "-o", "loop", imgPath, mountPoint], {
stdio: "pipe",
});
execFileSync(
"sudo",
["bash", "-c", `mkdir -p ${mountPoint}/memory && echo '# Agent Memory' > ${mountPoint}/MEMORY.md`],
{ stdio: "pipe" }
);
execFileSync("sudo", ["chown", "-R", "0:0", mountPoint], {
stdio: "pipe",
});
} finally {
try { execFileSync("sudo", ["umount", mountPoint], { stdio: "pipe" }); } catch {}
try { execFileSync("rmdir", [mountPoint], { stdio: "pipe" }); } catch {}
}
}
return imgPath;
}
function waitForSocket(socketPath: string): Promise<void> {
return new Promise((resolve, reject) => {
const deadline = Date.now() + 5_000;
const check = () => {
if (existsSync(socketPath)) {
setTimeout(resolve, 200);
return;
}
if (Date.now() > deadline) {
reject(new Error("Firecracker socket did not appear"));
return;
}
setTimeout(check, 50);
};
check();
});
}
export async function startAgent(
templateName: string,
overrides?: { name?: string; model?: string }
): Promise<AgentInfo> {
if (!existsSync(AGENT_ROOTFS)) {
throw new Error(
`Agent rootfs not found at ${AGENT_ROOTFS}. Build it first.`
);
}
const template = loadTemplate(templateName);
const name = overrides?.name ?? template.name;
const nick = overrides?.name ?? template.nick;
const model = overrides?.model ?? template.model;
// Check not already running
const agents = loadAgents();
if (agents[name]) {
throw new Error(`Agent "${name}" is already running`);
}
log(`Starting agent "${name}" (template: ${templateName})...`);
// Allocate resources
const { ip, octet } = allocateIp();
const tapDevice = `fctap${octet}`;
const socketPath = join(CONFIG.socketDir, `agent-${name}.sock`);
const rootfsPath = join(CONFIG.runsDir, `agent-${name}.ext4`);
mkdirSync(CONFIG.socketDir, { recursive: true });
mkdirSync(CONFIG.runsDir, { recursive: true });
// Prepare rootfs
copyFileSync(AGENT_ROOTFS, rootfsPath);
injectAgentConfig(
rootfsPath,
{ nick, model, trigger: template.trigger },
template.persona
);
// Create/get persistent workspace
const workspacePath = ensureWorkspace(name);
// Setup network
ensureBridge();
ensureNat();
createTap(tapDevice);
// Boot VM
const proc = spawn(
CONFIG.firecrackerBin,
["--api-sock", socketPath],
{ stdio: "pipe", detached: true }
);
proc.unref();
await waitForSocket(socketPath);
const bootArgs = [
"console=ttyS0",
"reboot=k",
"panic=1",
"pci=off",
"root=/dev/vda",
"rw",
`ip=${ip}::${CONFIG.bridge.gateway}:${CONFIG.bridge.netmask}::eth0:off`,
].join(" ");
await api.putBootSource(socketPath, CONFIG.kernelPath, bootArgs);
await api.putDrive(socketPath, "rootfs", rootfsPath);
await api.putDrive(socketPath, "workspace", workspacePath, false, false);
await api.putNetworkInterface(
socketPath,
"eth0",
tapDevice,
macFromOctet(octet)
);
await api.putMachineConfig(
socketPath,
CONFIG.vm.vcpuCount,
CONFIG.vm.memSizeMib
);
await api.startInstance(socketPath);
const info: AgentInfo = {
name,
nick,
model,
template: templateName,
ip,
octet,
tapDevice,
socketPath,
rootfsPath,
pid: proc.pid!,
startedAt: new Date().toISOString(),
};
agents[name] = info;
saveAgents(agents);
log(`Agent "${name}" started: nick=${nick} ip=${ip}`);
return info;
}
export async function stopAgent(name: string) {
const agents = loadAgents();
const info = agents[name];
if (!info) {
throw new Error(`Agent "${name}" is not running`);
}
log(`Stopping agent "${name}"...`);
// Graceful shutdown: SSH in and kill the agent process so it sends IRC QUIT
try {
execFileSync(
"ssh",
[
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "ConnectTimeout=3",
"-i", CONFIG.sshKeyPath,
`root@${info.ip}`,
"killall python3 2>/dev/null; sleep 1",
],
{ stdio: "pipe", timeout: 5_000 }
);
} catch {
// Best effort — VM might already be unreachable
}
// Kill firecracker process and wait for it to die
try {
process.kill(info.pid, "SIGKILL");
// Wait for process to actually exit before cleaning up resources
for (let i = 0; i < 20; i++) {
try {
process.kill(info.pid, 0); // Check if alive
await new Promise((r) => setTimeout(r, 200));
} catch {
break; // Process is gone
}
}
} catch {
// Already dead
}
// Small delay to let kernel release the tap device
await new Promise((r) => setTimeout(r, 500));
// Cleanup with retry for tap
try {
unlinkSync(info.socketPath);
} catch {}
for (let attempt = 0; attempt < 3; attempt++) {
try {
deleteTap(info.tapDevice);
break;
} catch {
if (attempt < 2) await new Promise((r) => setTimeout(r, 1000));
}
}
releaseIp(info.octet);
try {
unlinkSync(info.rootfsPath);
} catch {}
delete agents[name];
saveAgents(agents);
log(`Agent "${name}" stopped.`);
}
export function listAgents(): AgentInfo[] {
const agents = loadAgents();
// Verify processes are still alive
for (const [name, info] of Object.entries(agents)) {
try {
process.kill(info.pid, 0);
} catch {
// Process is dead, clean up
log(`Agent "${name}" is dead, cleaning up...`);
try {
deleteTap(info.tapDevice);
} catch {}
try {
releaseIp(info.octet);
} catch {}
try {
unlinkSync(info.rootfsPath);
} catch {}
try {
unlinkSync(info.socketPath);
} catch {}
delete agents[name];
}
}
saveAgents(agents);
return Object.values(agents);
}
export async function reloadAgent(
name: string,
updates: { model?: string; persona?: string; trigger?: string }
) {
const agents = loadAgents();
const info = agents[name];
if (!info) {
throw new Error(`Agent "${name}" is not running`);
}
log(`Reloading agent "${name}"...`);
// Build updated config
const configUpdates: Record<string, string> = {};
if (updates.model) {
configUpdates.model = updates.model;
info.model = updates.model;
}
if (updates.trigger) configUpdates.trigger = updates.trigger;
// Write updated config as a temp file on the VM via SSH
const sshOpts = [
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "ConnectTimeout=5",
"-i", CONFIG.sshKeyPath,
];
const sshTarget = `root@${info.ip}`;
try {
if (Object.keys(configUpdates).length > 0) {
// Read current config from VM
const currentRaw = execFileSync(
"ssh",
[...sshOpts, sshTarget, "cat /etc/agent/config.json"],
{ encoding: "utf-8", timeout: 10_000 }
);
const current = JSON.parse(currentRaw);
Object.assign(current, configUpdates);
const newConfig = JSON.stringify(current);
// Write back via stdin
execFileSync(
"ssh",
[...sshOpts, sshTarget, `cat > /etc/agent/config.json`],
{ input: newConfig, timeout: 10_000 }
);
}
if (updates.persona) {
execFileSync(
"ssh",
[...sshOpts, sshTarget, `cat > /etc/agent/persona.md`],
{ input: updates.persona, timeout: 10_000 }
);
}
// Signal agent to reload
execFileSync(
"ssh",
[...sshOpts, sshTarget, "killall -HUP python3"],
{ stdio: "pipe", timeout: 10_000 }
);
} catch (err) {
throw new Error(`Failed to reload agent: ${err}`);
}
saveAgents(agents);
log(`Agent "${name}" reloaded.`);
}
export function reconcileAgents(): { adopted: string[]; cleaned: string[] } {
const agents = loadAgents();
const adopted: string[] = [];
const cleaned: string[] = [];
for (const [name, info] of Object.entries(agents)) {
let alive = false;
try {
process.kill(info.pid, 0);
alive = true;
} catch {
// Process is dead
}
if (alive) {
adopted.push(name);
log(`Adopted running agent "${name}" (PID ${info.pid}, ${info.ip})`);
} else {
log(`Cleaning dead agent "${name}" (PID ${info.pid} gone)...`);
// Clean up resources from dead agent
try { deleteTap(info.tapDevice); } catch {}
try { releaseIp(info.octet); } catch {}
try { unlinkSync(info.rootfsPath); } catch {}
try { unlinkSync(info.socketPath); } catch {}
delete agents[name];
cleaned.push(name);
}
}
// Scan for orphan firecracker processes not in agents.json
try {
const psOutput = execFileSync("pgrep", ["-a", "firecracker"], {
encoding: "utf-8",
});
for (const line of psOutput.trim().split("\n")) {
if (!line) continue;
const match = line.match(/agent-(\S+)\.sock/);
if (match) {
const agentName = match[1];
if (!agents[agentName]) {
const pid = parseInt(line.split(/\s+/)[0]);
log(`Found orphan firecracker process for "${agentName}" (PID ${pid}), killing...`);
try { process.kill(pid, "SIGKILL"); } catch {}
cleaned.push(`orphan:${agentName}`);
}
}
}
} catch {
// No firecracker processes running — that's fine
}
saveAgents(agents);
if (adopted.length === 0 && cleaned.length === 0) {
log("No agents to reconcile.");
} else {
log(`Reconciled: ${adopted.length} adopted, ${cleaned.length} cleaned.`);
}
return { adopted, cleaned };
}
export async function stopAllAgents() {
const agents = loadAgents();
for (const name of Object.keys(agents)) {
await stopAgent(name);
}
}