Initial commit — fireclaw multi-agent system
Firecracker microVM-based multi-agent system with IRC orchestration and local LLMs. Features: - Ephemeral command runner with VM snapshots (~1.1s) - Multi-agent orchestration via overseer IRC bot - 5 agent templates (worker, coder, researcher, quick, creative) - Tool access (shell + podman containers inside VMs) - Persistent workspace + memory system (MEMORY.md pattern) - Agent hot-reload (model/persona swap via SSH + SIGHUP) - Non-root agents, graceful shutdown, crash recovery - Agent-to-agent communication via IRC - DM support, /invite support - Systemd service, 20 regression tests Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
557
src/agent-manager.ts
Normal file
557
src/agent-manager.ts
Normal file
@@ -0,0 +1,557 @@
|
||||
import { spawn, type ChildProcess } from "node:child_process";
|
||||
import {
|
||||
existsSync,
|
||||
mkdirSync,
|
||||
readFileSync,
|
||||
writeFileSync,
|
||||
copyFileSync,
|
||||
unlinkSync,
|
||||
readdirSync,
|
||||
} from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { CONFIG } from "./config.js";
|
||||
import {
|
||||
ensureBridge,
|
||||
ensureNat,
|
||||
allocateIp,
|
||||
releaseIp,
|
||||
createTap,
|
||||
deleteTap,
|
||||
macFromOctet,
|
||||
} from "./network.js";
|
||||
import * as api from "./firecracker-api.js";
|
||||
|
||||
export interface AgentInfo {
|
||||
name: string;
|
||||
nick: string;
|
||||
model: string;
|
||||
template: string;
|
||||
ip: string;
|
||||
octet: number;
|
||||
tapDevice: string;
|
||||
socketPath: string;
|
||||
rootfsPath: string;
|
||||
pid: number;
|
||||
startedAt: string;
|
||||
}
|
||||
|
||||
interface AgentTemplate {
|
||||
name: string;
|
||||
nick: string;
|
||||
model: string;
|
||||
trigger: string;
|
||||
persona: string;
|
||||
}
|
||||
|
||||
const AGENTS_FILE = join(CONFIG.baseDir, "agents.json");
|
||||
const TEMPLATES_DIR = join(CONFIG.baseDir, "templates");
|
||||
const AGENT_ROOTFS = join(CONFIG.baseDir, "agent-rootfs.ext4");
|
||||
const WORKSPACES_DIR = CONFIG.workspacesDir;
|
||||
|
||||
function log(msg: string) {
|
||||
process.stderr.write(`[agent-mgr] ${msg}\n`);
|
||||
}
|
||||
|
||||
function loadAgents(): Record<string, AgentInfo> {
|
||||
try {
|
||||
return JSON.parse(readFileSync(AGENTS_FILE, "utf-8"));
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
function saveAgents(agents: Record<string, AgentInfo>) {
|
||||
writeFileSync(AGENTS_FILE, JSON.stringify(agents, null, 2));
|
||||
}
|
||||
|
||||
export function loadTemplate(name: string): AgentTemplate {
|
||||
const path = join(TEMPLATES_DIR, `${name}.json`);
|
||||
if (!existsSync(path)) {
|
||||
throw new Error(`Template "${name}" not found at ${path}`);
|
||||
}
|
||||
return JSON.parse(readFileSync(path, "utf-8"));
|
||||
}
|
||||
|
||||
export function listTemplates(): string[] {
|
||||
try {
|
||||
return readdirSync(TEMPLATES_DIR)
|
||||
.filter((f) => f.endsWith(".json"))
|
||||
.map((f) => f.replace(".json", ""));
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function injectAgentConfig(
|
||||
rootfsPath: string,
|
||||
config: { nick: string; model: string; trigger: string },
|
||||
persona: string
|
||||
) {
|
||||
const mountPoint = `/tmp/fireclaw-agent-${Date.now()}`;
|
||||
mkdirSync(mountPoint, { recursive: true });
|
||||
try {
|
||||
execFileSync("sudo", ["mount", "-o", "loop", rootfsPath, mountPoint], {
|
||||
stdio: "pipe",
|
||||
});
|
||||
execFileSync(
|
||||
"sudo",
|
||||
["mkdir", "-p", join(mountPoint, "etc/agent")],
|
||||
{ stdio: "pipe" }
|
||||
);
|
||||
|
||||
// Write config
|
||||
const configJson = JSON.stringify({
|
||||
nick: config.nick,
|
||||
model: config.model,
|
||||
trigger: config.trigger,
|
||||
server: "172.16.0.1",
|
||||
port: 6667,
|
||||
ollama_url: "http://172.16.0.1:11434",
|
||||
});
|
||||
execFileSync(
|
||||
"sudo",
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
`echo '${configJson}' > ${join(mountPoint, "etc/agent/config.json")}`,
|
||||
],
|
||||
{ stdio: "pipe" }
|
||||
);
|
||||
|
||||
// Write persona
|
||||
execFileSync(
|
||||
"sudo",
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
`cat > ${join(mountPoint, "etc/agent/persona.md")} << 'PERSONA_EOF'\n${persona}\nPERSONA_EOF`,
|
||||
],
|
||||
{ stdio: "pipe" }
|
||||
);
|
||||
|
||||
// Inject SSH key for debugging access
|
||||
execFileSync("sudo", ["mkdir", "-p", join(mountPoint, "root/.ssh")], {
|
||||
stdio: "pipe",
|
||||
});
|
||||
if (existsSync(CONFIG.sshPubKeyPath)) {
|
||||
execFileSync(
|
||||
"sudo",
|
||||
[
|
||||
"cp",
|
||||
CONFIG.sshPubKeyPath,
|
||||
join(mountPoint, "root/.ssh/authorized_keys"),
|
||||
],
|
||||
{ stdio: "pipe" }
|
||||
);
|
||||
execFileSync(
|
||||
"sudo",
|
||||
["chmod", "600", join(mountPoint, "root/.ssh/authorized_keys")],
|
||||
{ stdio: "pipe" }
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
execFileSync("sudo", ["umount", mountPoint], { stdio: "pipe" });
|
||||
} catch {}
|
||||
try {
|
||||
execFileSync("rmdir", [mountPoint], { stdio: "pipe" });
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
function ensureWorkspace(agentName: string): string {
|
||||
mkdirSync(WORKSPACES_DIR, { recursive: true });
|
||||
const imgPath = join(WORKSPACES_DIR, `${agentName}.ext4`);
|
||||
|
||||
if (!existsSync(imgPath)) {
|
||||
log(`Creating workspace for "${agentName}" (${CONFIG.workspaceSizeMib} MiB)...`);
|
||||
execFileSync("truncate", ["-s", `${CONFIG.workspaceSizeMib}M`, imgPath], {
|
||||
stdio: "pipe",
|
||||
});
|
||||
execFileSync("sudo", ["/usr/sbin/mkfs.ext4", "-q", imgPath], {
|
||||
stdio: "pipe",
|
||||
});
|
||||
|
||||
// Seed with MEMORY.md template
|
||||
const mountPoint = `/tmp/fireclaw-ws-${Date.now()}`;
|
||||
mkdirSync(mountPoint, { recursive: true });
|
||||
try {
|
||||
execFileSync("sudo", ["mount", "-o", "loop", imgPath, mountPoint], {
|
||||
stdio: "pipe",
|
||||
});
|
||||
execFileSync(
|
||||
"sudo",
|
||||
["bash", "-c", `mkdir -p ${mountPoint}/memory && echo '# Agent Memory' > ${mountPoint}/MEMORY.md`],
|
||||
{ stdio: "pipe" }
|
||||
);
|
||||
execFileSync("sudo", ["chown", "-R", "0:0", mountPoint], {
|
||||
stdio: "pipe",
|
||||
});
|
||||
} finally {
|
||||
try { execFileSync("sudo", ["umount", mountPoint], { stdio: "pipe" }); } catch {}
|
||||
try { execFileSync("rmdir", [mountPoint], { stdio: "pipe" }); } catch {}
|
||||
}
|
||||
}
|
||||
|
||||
return imgPath;
|
||||
}
|
||||
|
||||
function waitForSocket(socketPath: string): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const deadline = Date.now() + 5_000;
|
||||
const check = () => {
|
||||
if (existsSync(socketPath)) {
|
||||
setTimeout(resolve, 200);
|
||||
return;
|
||||
}
|
||||
if (Date.now() > deadline) {
|
||||
reject(new Error("Firecracker socket did not appear"));
|
||||
return;
|
||||
}
|
||||
setTimeout(check, 50);
|
||||
};
|
||||
check();
|
||||
});
|
||||
}
|
||||
|
||||
export async function startAgent(
|
||||
templateName: string,
|
||||
overrides?: { name?: string; model?: string }
|
||||
): Promise<AgentInfo> {
|
||||
if (!existsSync(AGENT_ROOTFS)) {
|
||||
throw new Error(
|
||||
`Agent rootfs not found at ${AGENT_ROOTFS}. Build it first.`
|
||||
);
|
||||
}
|
||||
|
||||
const template = loadTemplate(templateName);
|
||||
const name = overrides?.name ?? template.name;
|
||||
const nick = overrides?.name ?? template.nick;
|
||||
const model = overrides?.model ?? template.model;
|
||||
|
||||
// Check not already running
|
||||
const agents = loadAgents();
|
||||
if (agents[name]) {
|
||||
throw new Error(`Agent "${name}" is already running`);
|
||||
}
|
||||
|
||||
log(`Starting agent "${name}" (template: ${templateName})...`);
|
||||
|
||||
// Allocate resources
|
||||
const { ip, octet } = allocateIp();
|
||||
const tapDevice = `fctap${octet}`;
|
||||
const socketPath = join(CONFIG.socketDir, `agent-${name}.sock`);
|
||||
const rootfsPath = join(CONFIG.runsDir, `agent-${name}.ext4`);
|
||||
|
||||
mkdirSync(CONFIG.socketDir, { recursive: true });
|
||||
mkdirSync(CONFIG.runsDir, { recursive: true });
|
||||
|
||||
// Prepare rootfs
|
||||
copyFileSync(AGENT_ROOTFS, rootfsPath);
|
||||
injectAgentConfig(
|
||||
rootfsPath,
|
||||
{ nick, model, trigger: template.trigger },
|
||||
template.persona
|
||||
);
|
||||
|
||||
// Create/get persistent workspace
|
||||
const workspacePath = ensureWorkspace(name);
|
||||
|
||||
// Setup network
|
||||
ensureBridge();
|
||||
ensureNat();
|
||||
createTap(tapDevice);
|
||||
|
||||
// Boot VM
|
||||
const proc = spawn(
|
||||
CONFIG.firecrackerBin,
|
||||
["--api-sock", socketPath],
|
||||
{ stdio: "pipe", detached: true }
|
||||
);
|
||||
proc.unref();
|
||||
|
||||
await waitForSocket(socketPath);
|
||||
|
||||
const bootArgs = [
|
||||
"console=ttyS0",
|
||||
"reboot=k",
|
||||
"panic=1",
|
||||
"pci=off",
|
||||
"root=/dev/vda",
|
||||
"rw",
|
||||
`ip=${ip}::${CONFIG.bridge.gateway}:${CONFIG.bridge.netmask}::eth0:off`,
|
||||
].join(" ");
|
||||
|
||||
await api.putBootSource(socketPath, CONFIG.kernelPath, bootArgs);
|
||||
await api.putDrive(socketPath, "rootfs", rootfsPath);
|
||||
await api.putDrive(socketPath, "workspace", workspacePath, false, false);
|
||||
await api.putNetworkInterface(
|
||||
socketPath,
|
||||
"eth0",
|
||||
tapDevice,
|
||||
macFromOctet(octet)
|
||||
);
|
||||
await api.putMachineConfig(
|
||||
socketPath,
|
||||
CONFIG.vm.vcpuCount,
|
||||
CONFIG.vm.memSizeMib
|
||||
);
|
||||
await api.startInstance(socketPath);
|
||||
|
||||
const info: AgentInfo = {
|
||||
name,
|
||||
nick,
|
||||
model,
|
||||
template: templateName,
|
||||
ip,
|
||||
octet,
|
||||
tapDevice,
|
||||
socketPath,
|
||||
rootfsPath,
|
||||
pid: proc.pid!,
|
||||
startedAt: new Date().toISOString(),
|
||||
};
|
||||
|
||||
agents[name] = info;
|
||||
saveAgents(agents);
|
||||
|
||||
log(`Agent "${name}" started: nick=${nick} ip=${ip}`);
|
||||
return info;
|
||||
}
|
||||
|
||||
export async function stopAgent(name: string) {
|
||||
const agents = loadAgents();
|
||||
const info = agents[name];
|
||||
if (!info) {
|
||||
throw new Error(`Agent "${name}" is not running`);
|
||||
}
|
||||
|
||||
log(`Stopping agent "${name}"...`);
|
||||
|
||||
// Graceful shutdown: SSH in and kill the agent process so it sends IRC QUIT
|
||||
try {
|
||||
execFileSync(
|
||||
"ssh",
|
||||
[
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "UserKnownHostsFile=/dev/null",
|
||||
"-o", "ConnectTimeout=3",
|
||||
"-i", CONFIG.sshKeyPath,
|
||||
`root@${info.ip}`,
|
||||
"killall python3 2>/dev/null; sleep 1",
|
||||
],
|
||||
{ stdio: "pipe", timeout: 5_000 }
|
||||
);
|
||||
} catch {
|
||||
// Best effort — VM might already be unreachable
|
||||
}
|
||||
|
||||
// Kill firecracker process and wait for it to die
|
||||
try {
|
||||
process.kill(info.pid, "SIGKILL");
|
||||
// Wait for process to actually exit before cleaning up resources
|
||||
for (let i = 0; i < 20; i++) {
|
||||
try {
|
||||
process.kill(info.pid, 0); // Check if alive
|
||||
await new Promise((r) => setTimeout(r, 200));
|
||||
} catch {
|
||||
break; // Process is gone
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Already dead
|
||||
}
|
||||
|
||||
// Small delay to let kernel release the tap device
|
||||
await new Promise((r) => setTimeout(r, 500));
|
||||
|
||||
// Cleanup with retry for tap
|
||||
try {
|
||||
unlinkSync(info.socketPath);
|
||||
} catch {}
|
||||
for (let attempt = 0; attempt < 3; attempt++) {
|
||||
try {
|
||||
deleteTap(info.tapDevice);
|
||||
break;
|
||||
} catch {
|
||||
if (attempt < 2) await new Promise((r) => setTimeout(r, 1000));
|
||||
}
|
||||
}
|
||||
releaseIp(info.octet);
|
||||
try {
|
||||
unlinkSync(info.rootfsPath);
|
||||
} catch {}
|
||||
|
||||
delete agents[name];
|
||||
saveAgents(agents);
|
||||
log(`Agent "${name}" stopped.`);
|
||||
}
|
||||
|
||||
export function listAgents(): AgentInfo[] {
|
||||
const agents = loadAgents();
|
||||
// Verify processes are still alive
|
||||
for (const [name, info] of Object.entries(agents)) {
|
||||
try {
|
||||
process.kill(info.pid, 0);
|
||||
} catch {
|
||||
// Process is dead, clean up
|
||||
log(`Agent "${name}" is dead, cleaning up...`);
|
||||
try {
|
||||
deleteTap(info.tapDevice);
|
||||
} catch {}
|
||||
try {
|
||||
releaseIp(info.octet);
|
||||
} catch {}
|
||||
try {
|
||||
unlinkSync(info.rootfsPath);
|
||||
} catch {}
|
||||
try {
|
||||
unlinkSync(info.socketPath);
|
||||
} catch {}
|
||||
delete agents[name];
|
||||
}
|
||||
}
|
||||
saveAgents(agents);
|
||||
return Object.values(agents);
|
||||
}
|
||||
|
||||
export async function reloadAgent(
|
||||
name: string,
|
||||
updates: { model?: string; persona?: string; trigger?: string }
|
||||
) {
|
||||
const agents = loadAgents();
|
||||
const info = agents[name];
|
||||
if (!info) {
|
||||
throw new Error(`Agent "${name}" is not running`);
|
||||
}
|
||||
|
||||
log(`Reloading agent "${name}"...`);
|
||||
|
||||
// Build updated config
|
||||
const configUpdates: Record<string, string> = {};
|
||||
if (updates.model) {
|
||||
configUpdates.model = updates.model;
|
||||
info.model = updates.model;
|
||||
}
|
||||
if (updates.trigger) configUpdates.trigger = updates.trigger;
|
||||
|
||||
// Write updated config as a temp file on the VM via SSH
|
||||
const sshOpts = [
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "UserKnownHostsFile=/dev/null",
|
||||
"-o", "ConnectTimeout=5",
|
||||
"-i", CONFIG.sshKeyPath,
|
||||
];
|
||||
const sshTarget = `root@${info.ip}`;
|
||||
|
||||
try {
|
||||
if (Object.keys(configUpdates).length > 0) {
|
||||
// Read current config from VM
|
||||
const currentRaw = execFileSync(
|
||||
"ssh",
|
||||
[...sshOpts, sshTarget, "cat /etc/agent/config.json"],
|
||||
{ encoding: "utf-8", timeout: 10_000 }
|
||||
);
|
||||
const current = JSON.parse(currentRaw);
|
||||
Object.assign(current, configUpdates);
|
||||
const newConfig = JSON.stringify(current);
|
||||
|
||||
// Write back via stdin
|
||||
execFileSync(
|
||||
"ssh",
|
||||
[...sshOpts, sshTarget, `cat > /etc/agent/config.json`],
|
||||
{ input: newConfig, timeout: 10_000 }
|
||||
);
|
||||
}
|
||||
|
||||
if (updates.persona) {
|
||||
execFileSync(
|
||||
"ssh",
|
||||
[...sshOpts, sshTarget, `cat > /etc/agent/persona.md`],
|
||||
{ input: updates.persona, timeout: 10_000 }
|
||||
);
|
||||
}
|
||||
|
||||
// Signal agent to reload
|
||||
execFileSync(
|
||||
"ssh",
|
||||
[...sshOpts, sshTarget, "killall -HUP python3"],
|
||||
{ stdio: "pipe", timeout: 10_000 }
|
||||
);
|
||||
} catch (err) {
|
||||
throw new Error(`Failed to reload agent: ${err}`);
|
||||
}
|
||||
|
||||
saveAgents(agents);
|
||||
log(`Agent "${name}" reloaded.`);
|
||||
}
|
||||
|
||||
export function reconcileAgents(): { adopted: string[]; cleaned: string[] } {
|
||||
const agents = loadAgents();
|
||||
const adopted: string[] = [];
|
||||
const cleaned: string[] = [];
|
||||
|
||||
for (const [name, info] of Object.entries(agents)) {
|
||||
let alive = false;
|
||||
try {
|
||||
process.kill(info.pid, 0);
|
||||
alive = true;
|
||||
} catch {
|
||||
// Process is dead
|
||||
}
|
||||
|
||||
if (alive) {
|
||||
adopted.push(name);
|
||||
log(`Adopted running agent "${name}" (PID ${info.pid}, ${info.ip})`);
|
||||
} else {
|
||||
log(`Cleaning dead agent "${name}" (PID ${info.pid} gone)...`);
|
||||
// Clean up resources from dead agent
|
||||
try { deleteTap(info.tapDevice); } catch {}
|
||||
try { releaseIp(info.octet); } catch {}
|
||||
try { unlinkSync(info.rootfsPath); } catch {}
|
||||
try { unlinkSync(info.socketPath); } catch {}
|
||||
delete agents[name];
|
||||
cleaned.push(name);
|
||||
}
|
||||
}
|
||||
|
||||
// Scan for orphan firecracker processes not in agents.json
|
||||
try {
|
||||
const psOutput = execFileSync("pgrep", ["-a", "firecracker"], {
|
||||
encoding: "utf-8",
|
||||
});
|
||||
for (const line of psOutput.trim().split("\n")) {
|
||||
if (!line) continue;
|
||||
const match = line.match(/agent-(\S+)\.sock/);
|
||||
if (match) {
|
||||
const agentName = match[1];
|
||||
if (!agents[agentName]) {
|
||||
const pid = parseInt(line.split(/\s+/)[0]);
|
||||
log(`Found orphan firecracker process for "${agentName}" (PID ${pid}), killing...`);
|
||||
try { process.kill(pid, "SIGKILL"); } catch {}
|
||||
cleaned.push(`orphan:${agentName}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// No firecracker processes running — that's fine
|
||||
}
|
||||
|
||||
saveAgents(agents);
|
||||
|
||||
if (adopted.length === 0 && cleaned.length === 0) {
|
||||
log("No agents to reconcile.");
|
||||
} else {
|
||||
log(`Reconciled: ${adopted.length} adopted, ${cleaned.length} cleaned.`);
|
||||
}
|
||||
|
||||
return { adopted, cleaned };
|
||||
}
|
||||
|
||||
export async function stopAllAgents() {
|
||||
const agents = loadAgents();
|
||||
for (const name of Object.keys(agents)) {
|
||||
await stopAgent(name);
|
||||
}
|
||||
}
|
||||
47
src/cleanup.ts
Normal file
47
src/cleanup.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
import type { VMInstance } from "./vm.js";
|
||||
|
||||
const activeVms = new Set<VMInstance>();
|
||||
|
||||
export function registerVm(vm: VMInstance) {
|
||||
activeVms.add(vm);
|
||||
}
|
||||
|
||||
export function unregisterVm(vm: VMInstance) {
|
||||
activeVms.delete(vm);
|
||||
}
|
||||
|
||||
async function cleanupAll() {
|
||||
const vms = Array.from(activeVms);
|
||||
activeVms.clear();
|
||||
await Promise.allSettled(vms.map((vm) => vm.destroy()));
|
||||
}
|
||||
|
||||
let registered = false;
|
||||
|
||||
export function installSignalHandlers() {
|
||||
if (registered) return;
|
||||
registered = true;
|
||||
|
||||
const handler = async (signal: string) => {
|
||||
process.stderr.write(`\n[fireclaw] Caught ${signal}, cleaning up...\n`);
|
||||
await cleanupAll();
|
||||
process.exit(signal === "SIGINT" ? 130 : 143);
|
||||
};
|
||||
|
||||
process.on("SIGINT", () => handler("SIGINT"));
|
||||
process.on("SIGTERM", () => handler("SIGTERM"));
|
||||
|
||||
process.on("uncaughtException", async (err) => {
|
||||
process.stderr.write(`[fireclaw] Uncaught exception: ${err.message}\n`);
|
||||
await cleanupAll();
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
process.on("unhandledRejection", async (reason) => {
|
||||
process.stderr.write(
|
||||
`[fireclaw] Unhandled rejection: ${reason}\n`
|
||||
);
|
||||
await cleanupAll();
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
134
src/cli.ts
Normal file
134
src/cli.ts
Normal file
@@ -0,0 +1,134 @@
|
||||
import { Command } from "commander";
|
||||
import { VMInstance } from "./vm.js";
|
||||
import { installSignalHandlers } from "./cleanup.js";
|
||||
import { runSetup } from "./setup.js";
|
||||
import { createSnapshot } from "./snapshot.js";
|
||||
import { runOverseer } from "./overseer.js";
|
||||
import {
|
||||
startAgent,
|
||||
stopAgent,
|
||||
listAgents,
|
||||
} from "./agent-manager.js";
|
||||
|
||||
export function createCli() {
|
||||
const program = new Command();
|
||||
|
||||
program
|
||||
.name("fireclaw")
|
||||
.description("Run commands in ephemeral Firecracker microVMs")
|
||||
.version("0.1.0");
|
||||
|
||||
program
|
||||
.command("run")
|
||||
.description("Run a command inside a fresh microVM")
|
||||
.argument("<command>", "Command to execute inside the microVM")
|
||||
.option("-t, --timeout <seconds>", "Timeout in seconds", "60")
|
||||
.option("-v, --verbose", "Show detailed progress", false)
|
||||
.option("--mem <mib>", "Memory in MiB", "256")
|
||||
.option("--vcpu <count>", "Number of vCPUs", "1")
|
||||
.option("--no-snapshot", "Force cold boot, skip snapshot restore")
|
||||
.action(async (command: string, opts) => {
|
||||
installSignalHandlers();
|
||||
|
||||
const result = await VMInstance.run(command, {
|
||||
timeout: parseInt(opts.timeout) * 1000,
|
||||
verbose: opts.verbose,
|
||||
mem: parseInt(opts.mem),
|
||||
vcpu: parseInt(opts.vcpu),
|
||||
noSnapshot: opts.snapshot === false,
|
||||
});
|
||||
|
||||
if (!opts.verbose) {
|
||||
if (result.stdout) process.stdout.write(result.stdout);
|
||||
if (result.stderr) process.stderr.write(result.stderr);
|
||||
}
|
||||
|
||||
process.exit(result.exitCode);
|
||||
});
|
||||
|
||||
program
|
||||
.command("setup")
|
||||
.description("Download kernel, rootfs, and configure networking")
|
||||
.action(async () => {
|
||||
await runSetup();
|
||||
});
|
||||
|
||||
const snapshot = program
|
||||
.command("snapshot")
|
||||
.description("Manage VM snapshots");
|
||||
|
||||
snapshot
|
||||
.command("create")
|
||||
.description("Boot a VM and create a snapshot for fast restores")
|
||||
.action(async () => {
|
||||
installSignalHandlers();
|
||||
await createSnapshot();
|
||||
});
|
||||
|
||||
// Overseer
|
||||
program
|
||||
.command("overseer")
|
||||
.description("Start the overseer daemon (IRC bot for agent management)")
|
||||
.option("--server <host>", "IRC server", "localhost")
|
||||
.option("--port <port>", "IRC port", "6667")
|
||||
.option("--nick <nick>", "Bot nickname", "overseer")
|
||||
.option("--channel <chan>", "Control channel", "#control")
|
||||
.action(async (opts) => {
|
||||
await runOverseer({
|
||||
server: opts.server,
|
||||
port: parseInt(opts.port),
|
||||
nick: opts.nick,
|
||||
channel: opts.channel,
|
||||
});
|
||||
});
|
||||
|
||||
// Agent management
|
||||
const agent = program
|
||||
.command("agent")
|
||||
.description("Manage long-running agent VMs");
|
||||
|
||||
agent
|
||||
.command("start")
|
||||
.description("Start an agent VM from a template")
|
||||
.argument("<template>", "Template name")
|
||||
.option("--name <name>", "Override agent name")
|
||||
.option("--model <model>", "Override LLM model")
|
||||
.action(async (template: string, opts) => {
|
||||
installSignalHandlers();
|
||||
const info = await startAgent(template, {
|
||||
name: opts.name,
|
||||
model: opts.model,
|
||||
});
|
||||
console.log(
|
||||
`Agent "${info.name}" started: ${info.nick} [${info.model}] (${info.ip})`
|
||||
);
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
agent
|
||||
.command("stop")
|
||||
.description("Stop a running agent VM")
|
||||
.argument("<name>", "Agent name")
|
||||
.action(async (name: string) => {
|
||||
await stopAgent(name);
|
||||
console.log(`Agent "${name}" stopped.`);
|
||||
});
|
||||
|
||||
agent
|
||||
.command("list")
|
||||
.description("List running agent VMs")
|
||||
.action(() => {
|
||||
const agents = listAgents();
|
||||
if (agents.length === 0) {
|
||||
console.log("No agents running.");
|
||||
return;
|
||||
}
|
||||
for (const a of agents) {
|
||||
console.log(
|
||||
`${a.name} (${a.template}) — ${a.nick} [${a.model}] ip=${a.ip} since ${a.startedAt}`
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
return program;
|
||||
}
|
||||
57
src/config.ts
Normal file
57
src/config.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
|
||||
const HOME = homedir();
|
||||
|
||||
export const CONFIG = {
|
||||
firecrackerBin: "/usr/local/bin/firecracker",
|
||||
baseDir: join(HOME, ".fireclaw"),
|
||||
kernelPath: join(HOME, ".fireclaw", "vmlinux"),
|
||||
baseRootfs: join(HOME, ".fireclaw", "base-rootfs.ext4"),
|
||||
runsDir: join(HOME, ".fireclaw", "runs"),
|
||||
sshKeyPath: join(HOME, ".fireclaw", "id_ed25519"),
|
||||
sshPubKeyPath: join(HOME, ".fireclaw", "id_ed25519.pub"),
|
||||
socketDir: "/tmp/fireclaw",
|
||||
ipPoolFile: join(HOME, ".fireclaw", "ip-pool.json"),
|
||||
ipPoolLock: join(HOME, ".fireclaw", "ip-pool.lock"),
|
||||
|
||||
bridge: {
|
||||
name: "fcbr0",
|
||||
ip: "172.16.0.1",
|
||||
subnet: "172.16.0.0/24",
|
||||
netmask: "255.255.255.0",
|
||||
gateway: "172.16.0.1",
|
||||
prefix: "172.16.0",
|
||||
minHost: 2,
|
||||
maxHost: 254,
|
||||
},
|
||||
|
||||
vm: {
|
||||
vcpuCount: 1,
|
||||
memSizeMib: 256,
|
||||
defaultTimeoutMs: 60_000,
|
||||
bootTimeoutMs: 15_000,
|
||||
sshPollIntervalMs: 100,
|
||||
},
|
||||
|
||||
snapshot: {
|
||||
rootfsPath: join(HOME, ".fireclaw", "snapshot-rootfs.ext4"),
|
||||
statePath: join(HOME, ".fireclaw", "snapshot.state"),
|
||||
memPath: join(HOME, ".fireclaw", "snapshot.mem"),
|
||||
tapDevice: "fctap200",
|
||||
ip: "172.16.0.200",
|
||||
octet: 200,
|
||||
},
|
||||
|
||||
workspacesDir: join(HOME, ".fireclaw", "workspaces"),
|
||||
workspaceSizeMib: 64,
|
||||
|
||||
// S3 URLs for Firecracker CI assets
|
||||
assets: {
|
||||
kernelUrl:
|
||||
"https://s3.amazonaws.com/spec.ccfc.min/firecracker-ci/v1.11/x86_64/vmlinux-5.10.225",
|
||||
rootfsListUrl:
|
||||
"http://spec.ccfc.min.s3.amazonaws.com/?prefix=firecracker-ci/v1.11/x86_64/ubuntu",
|
||||
rootfsBaseUrl: "https://s3.amazonaws.com/spec.ccfc.min",
|
||||
},
|
||||
} as const;
|
||||
152
src/firecracker-api.ts
Normal file
152
src/firecracker-api.ts
Normal file
@@ -0,0 +1,152 @@
|
||||
import http from "node:http";
|
||||
|
||||
function request(
|
||||
socketPath: string,
|
||||
method: string,
|
||||
path: string,
|
||||
body?: object
|
||||
): Promise<{ status: number; body: string }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const payload = body ? JSON.stringify(body) : undefined;
|
||||
const headers: Record<string, string> = {};
|
||||
if (payload) {
|
||||
headers["Content-Type"] = "application/json";
|
||||
headers["Content-Length"] = Buffer.byteLength(payload).toString();
|
||||
}
|
||||
|
||||
const opts: http.RequestOptions = {
|
||||
socketPath,
|
||||
path,
|
||||
method,
|
||||
headers,
|
||||
};
|
||||
|
||||
const req = http.request(opts, (res) => {
|
||||
const chunks: Buffer[] = [];
|
||||
res.on("data", (chunk) => chunks.push(chunk));
|
||||
res.on("end", () => {
|
||||
resolve({
|
||||
status: res.statusCode ?? 0,
|
||||
body: Buffer.concat(chunks).toString(),
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
req.on("error", reject);
|
||||
|
||||
if (payload) {
|
||||
req.write(payload);
|
||||
}
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
function assertOk(res: { status: number; body: string }, action: string) {
|
||||
if (res.status < 200 || res.status >= 300) {
|
||||
throw new Error(
|
||||
`Firecracker API error (${action}): ${res.status} ${res.body}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export async function putBootSource(
|
||||
socketPath: string,
|
||||
kernelPath: string,
|
||||
bootArgs: string
|
||||
) {
|
||||
const res = await request(socketPath, "PUT", "/boot-source", {
|
||||
kernel_image_path: kernelPath,
|
||||
boot_args: bootArgs,
|
||||
});
|
||||
assertOk(res, "PUT /boot-source");
|
||||
}
|
||||
|
||||
export async function putDrive(
|
||||
socketPath: string,
|
||||
driveId: string,
|
||||
path: string,
|
||||
readOnly = false,
|
||||
isRoot = true
|
||||
) {
|
||||
const res = await request(socketPath, "PUT", `/drives/${driveId}`, {
|
||||
drive_id: driveId,
|
||||
path_on_host: path,
|
||||
is_root_device: isRoot,
|
||||
is_read_only: readOnly,
|
||||
});
|
||||
assertOk(res, `PUT /drives/${driveId}`);
|
||||
}
|
||||
|
||||
export async function putNetworkInterface(
|
||||
socketPath: string,
|
||||
ifaceId: string,
|
||||
hostDevName: string,
|
||||
guestMac: string
|
||||
) {
|
||||
const res = await request(
|
||||
socketPath,
|
||||
"PUT",
|
||||
`/network-interfaces/${ifaceId}`,
|
||||
{
|
||||
iface_id: ifaceId,
|
||||
guest_mac: guestMac,
|
||||
host_dev_name: hostDevName,
|
||||
}
|
||||
);
|
||||
assertOk(res, `PUT /network-interfaces/${ifaceId}`);
|
||||
}
|
||||
|
||||
export async function putMachineConfig(
|
||||
socketPath: string,
|
||||
vcpuCount: number,
|
||||
memSizeMib: number
|
||||
) {
|
||||
const res = await request(socketPath, "PUT", "/machine-config", {
|
||||
vcpu_count: vcpuCount,
|
||||
mem_size_mib: memSizeMib,
|
||||
});
|
||||
assertOk(res, "PUT /machine-config");
|
||||
}
|
||||
|
||||
export async function startInstance(socketPath: string) {
|
||||
const res = await request(socketPath, "PUT", "/actions", {
|
||||
action_type: "InstanceStart",
|
||||
});
|
||||
assertOk(res, "PUT /actions InstanceStart");
|
||||
}
|
||||
|
||||
export async function patchVm(
|
||||
socketPath: string,
|
||||
state: "Paused" | "Resumed"
|
||||
) {
|
||||
const res = await request(socketPath, "PATCH", "/vm", { state });
|
||||
assertOk(res, `PATCH /vm ${state}`);
|
||||
}
|
||||
|
||||
export async function putSnapshotCreate(
|
||||
socketPath: string,
|
||||
snapshotPath: string,
|
||||
memFilePath: string
|
||||
) {
|
||||
const res = await request(socketPath, "PUT", "/snapshot/create", {
|
||||
snapshot_type: "Full",
|
||||
snapshot_path: snapshotPath,
|
||||
mem_file_path: memFilePath,
|
||||
});
|
||||
assertOk(res, "PUT /snapshot/create");
|
||||
}
|
||||
|
||||
export async function putSnapshotLoad(
|
||||
socketPath: string,
|
||||
snapshotPath: string,
|
||||
memFilePath: string
|
||||
) {
|
||||
const res = await request(socketPath, "PUT", "/snapshot/load", {
|
||||
snapshot_path: snapshotPath,
|
||||
mem_backend: {
|
||||
backend_type: "File",
|
||||
backend_path: memFilePath,
|
||||
},
|
||||
});
|
||||
assertOk(res, "PUT /snapshot/load");
|
||||
}
|
||||
5
src/index.ts
Normal file
5
src/index.ts
Normal file
@@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env node
|
||||
import { createCli } from "./cli.js";
|
||||
|
||||
const program = createCli();
|
||||
program.parse();
|
||||
14
src/irc-framework.d.ts
vendored
Normal file
14
src/irc-framework.d.ts
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
declare module "irc-framework" {
|
||||
class Client {
|
||||
connect(options: {
|
||||
host: string;
|
||||
port: number;
|
||||
nick: string;
|
||||
}): void;
|
||||
join(channel: string): void;
|
||||
say(target: string, message: string): void;
|
||||
quit(message?: string): void;
|
||||
on(event: string, handler: (...args: any[]) => void): void;
|
||||
}
|
||||
export default { Client };
|
||||
}
|
||||
165
src/network.ts
Normal file
165
src/network.ts
Normal file
@@ -0,0 +1,165 @@
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { openSync, closeSync, readFileSync, writeFileSync } from "node:fs";
|
||||
import { CONFIG } from "./config.js";
|
||||
|
||||
function run(cmd: string, args: string[]) {
|
||||
execFileSync(cmd, args, { stdio: "pipe" });
|
||||
}
|
||||
|
||||
function sudo(args: string[]) {
|
||||
run("sudo", args);
|
||||
}
|
||||
|
||||
export function ensureBridge() {
|
||||
try {
|
||||
execFileSync("ip", ["link", "show", CONFIG.bridge.name], {
|
||||
stdio: "pipe",
|
||||
});
|
||||
} catch {
|
||||
sudo(["ip", "link", "add", CONFIG.bridge.name, "type", "bridge"]);
|
||||
sudo([
|
||||
"ip",
|
||||
"addr",
|
||||
"add",
|
||||
`${CONFIG.bridge.ip}/24`,
|
||||
"dev",
|
||||
CONFIG.bridge.name,
|
||||
]);
|
||||
sudo(["ip", "link", "set", CONFIG.bridge.name, "up"]);
|
||||
sudo(["sysctl", "-w", "net.ipv4.ip_forward=1"]);
|
||||
}
|
||||
}
|
||||
|
||||
export function ensureNat() {
|
||||
// Check if rule already exists
|
||||
try {
|
||||
execFileSync(
|
||||
"sudo",
|
||||
[
|
||||
"iptables",
|
||||
"-t",
|
||||
"nat",
|
||||
"-C",
|
||||
"POSTROUTING",
|
||||
"-s",
|
||||
CONFIG.bridge.subnet,
|
||||
"-j",
|
||||
"MASQUERADE",
|
||||
],
|
||||
{ stdio: "pipe" }
|
||||
);
|
||||
} catch {
|
||||
// Find the default route interface
|
||||
const routeOut = execFileSync("ip", ["route", "show", "default"], {
|
||||
encoding: "utf-8",
|
||||
});
|
||||
const extIface = routeOut.match(/dev\s+(\S+)/)?.[1] ?? "eno2";
|
||||
|
||||
sudo([
|
||||
"iptables",
|
||||
"-t",
|
||||
"nat",
|
||||
"-A",
|
||||
"POSTROUTING",
|
||||
"-s",
|
||||
CONFIG.bridge.subnet,
|
||||
"-o",
|
||||
extIface,
|
||||
"-j",
|
||||
"MASQUERADE",
|
||||
]);
|
||||
sudo([
|
||||
"iptables",
|
||||
"-A",
|
||||
"FORWARD",
|
||||
"-i",
|
||||
CONFIG.bridge.name,
|
||||
"-o",
|
||||
extIface,
|
||||
"-j",
|
||||
"ACCEPT",
|
||||
]);
|
||||
sudo([
|
||||
"iptables",
|
||||
"-A",
|
||||
"FORWARD",
|
||||
"-i",
|
||||
extIface,
|
||||
"-o",
|
||||
CONFIG.bridge.name,
|
||||
"-m",
|
||||
"state",
|
||||
"--state",
|
||||
"RELATED,ESTABLISHED",
|
||||
"-j",
|
||||
"ACCEPT",
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
export function createTap(tapName: string) {
|
||||
sudo(["ip", "tuntap", "add", tapName, "mode", "tap"]);
|
||||
sudo(["ip", "link", "set", tapName, "master", CONFIG.bridge.name]);
|
||||
sudo(["ip", "link", "set", tapName, "up"]);
|
||||
}
|
||||
|
||||
export function deleteTap(tapName: string) {
|
||||
try {
|
||||
sudo(["ip", "tuntap", "del", tapName, "mode", "tap"]);
|
||||
} catch {
|
||||
// Already gone
|
||||
}
|
||||
}
|
||||
|
||||
export function macFromOctet(octet: number): string {
|
||||
return `AA:FC:00:00:00:${octet.toString(16).padStart(2, "0").toUpperCase()}`;
|
||||
}
|
||||
|
||||
interface IpPool {
|
||||
allocated: number[];
|
||||
}
|
||||
|
||||
function readPool(): IpPool {
|
||||
try {
|
||||
return JSON.parse(readFileSync(CONFIG.ipPoolFile, "utf-8"));
|
||||
} catch {
|
||||
return { allocated: [] };
|
||||
}
|
||||
}
|
||||
|
||||
function writePool(pool: IpPool) {
|
||||
writeFileSync(CONFIG.ipPoolFile, JSON.stringify(pool));
|
||||
}
|
||||
|
||||
export function allocateIp(): { ip: string; octet: number } {
|
||||
const fd = openSync(CONFIG.ipPoolLock, "w");
|
||||
try {
|
||||
// Simple flock via child process
|
||||
const pool = readPool();
|
||||
for (
|
||||
let octet = CONFIG.bridge.minHost;
|
||||
octet <= CONFIG.bridge.maxHost;
|
||||
octet++
|
||||
) {
|
||||
if (!pool.allocated.includes(octet)) {
|
||||
pool.allocated.push(octet);
|
||||
writePool(pool);
|
||||
return { ip: `${CONFIG.bridge.prefix}.${octet}`, octet };
|
||||
}
|
||||
}
|
||||
throw new Error("No free IPs in pool");
|
||||
} finally {
|
||||
closeSync(fd);
|
||||
}
|
||||
}
|
||||
|
||||
export function releaseIp(octet: number) {
|
||||
const fd = openSync(CONFIG.ipPoolLock, "w");
|
||||
try {
|
||||
const pool = readPool();
|
||||
pool.allocated = pool.allocated.filter((o) => o !== octet);
|
||||
writePool(pool);
|
||||
} finally {
|
||||
closeSync(fd);
|
||||
}
|
||||
}
|
||||
188
src/overseer.ts
Normal file
188
src/overseer.ts
Normal file
@@ -0,0 +1,188 @@
|
||||
import IRC from "irc-framework";
|
||||
import {
|
||||
startAgent,
|
||||
stopAgent,
|
||||
listAgents,
|
||||
stopAllAgents,
|
||||
listTemplates,
|
||||
reconcileAgents,
|
||||
reloadAgent,
|
||||
type AgentInfo,
|
||||
} from "./agent-manager.js";
|
||||
|
||||
interface OverseerConfig {
|
||||
server: string;
|
||||
port: number;
|
||||
nick: string;
|
||||
channel: string;
|
||||
}
|
||||
|
||||
function log(msg: string) {
|
||||
process.stderr.write(`[overseer] ${msg}\n`);
|
||||
}
|
||||
|
||||
function formatAgentList(agents: AgentInfo[]): string[] {
|
||||
if (agents.length === 0) return ["No agents running."];
|
||||
return agents.map(
|
||||
(a) =>
|
||||
`${a.name} (${a.template}) — ${a.nick} [${a.model}] ip=${a.ip} since ${a.startedAt.slice(11, 19)}`
|
||||
);
|
||||
}
|
||||
|
||||
export async function runOverseer(config: OverseerConfig) {
|
||||
// Reconcile agent state on startup
|
||||
log("Reconciling agent state...");
|
||||
const { adopted, cleaned } = reconcileAgents();
|
||||
if (adopted.length > 0) {
|
||||
log(`Adopted ${adopted.length} running agent(s): ${adopted.join(", ")}`);
|
||||
}
|
||||
if (cleaned.length > 0) {
|
||||
log(`Cleaned ${cleaned.length} dead agent(s): ${cleaned.join(", ")}`);
|
||||
}
|
||||
|
||||
const bot = new IRC.Client();
|
||||
|
||||
bot.connect({
|
||||
host: config.server,
|
||||
port: config.port,
|
||||
nick: config.nick,
|
||||
});
|
||||
|
||||
bot.on("registered", () => {
|
||||
log(`Connected to ${config.server}:${config.port} as ${config.nick}`);
|
||||
bot.join(config.channel);
|
||||
bot.join("#agents");
|
||||
log(`Joined ${config.channel} and #agents`);
|
||||
});
|
||||
|
||||
bot.on("message", async (event: { nick: string; target: string; message: string }) => {
|
||||
// Only handle channel messages
|
||||
if (!event.target.startsWith("#")) return;
|
||||
|
||||
const text = event.message.trim();
|
||||
if (!text.startsWith("!")) return;
|
||||
|
||||
const parts = text.split(/\s+/);
|
||||
const cmd = parts[0].toLowerCase();
|
||||
|
||||
try {
|
||||
switch (cmd) {
|
||||
case "!invoke": {
|
||||
const template = parts[1];
|
||||
if (!template) {
|
||||
bot.say(event.target, "Usage: !invoke <template> [name]");
|
||||
return;
|
||||
}
|
||||
const name = parts[2];
|
||||
bot.say(event.target, `Invoking agent "${name ?? template}" from template "${template}"...`);
|
||||
const info = await startAgent(template, { name });
|
||||
bot.say(
|
||||
event.target,
|
||||
`Agent "${info.name}" started: ${info.nick} [${info.model}] (${info.ip})`
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
case "!destroy": {
|
||||
const name = parts[1];
|
||||
if (!name) {
|
||||
bot.say(event.target, "Usage: !destroy <name>");
|
||||
return;
|
||||
}
|
||||
await stopAgent(name);
|
||||
bot.say(event.target, `Agent "${name}" destroyed.`);
|
||||
break;
|
||||
}
|
||||
|
||||
case "!list": {
|
||||
const agents = listAgents();
|
||||
for (const line of formatAgentList(agents)) {
|
||||
bot.say(event.target, line);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "!model": {
|
||||
const name = parts[1];
|
||||
const model = parts[2];
|
||||
if (!name || !model) {
|
||||
bot.say(event.target, "Usage: !model <name> <model>");
|
||||
return;
|
||||
}
|
||||
await reloadAgent(name, { model });
|
||||
bot.say(event.target, `Agent "${name}" hot-reloaded with model ${model}.`);
|
||||
break;
|
||||
}
|
||||
|
||||
case "!templates": {
|
||||
const templates = listTemplates();
|
||||
if (templates.length === 0) {
|
||||
bot.say(event.target, "No templates found.");
|
||||
} else {
|
||||
bot.say(event.target, `Templates: ${templates.join(", ")}`);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "!models": {
|
||||
try {
|
||||
const http = await import("node:http");
|
||||
const data = await new Promise<string>((resolve, reject) => {
|
||||
http.get("http://localhost:11434/api/tags", (res) => {
|
||||
const chunks: Buffer[] = [];
|
||||
res.on("data", (c) => chunks.push(c));
|
||||
res.on("end", () => resolve(Buffer.concat(chunks).toString()));
|
||||
}).on("error", reject);
|
||||
});
|
||||
const models = JSON.parse(data).models;
|
||||
if (models.length === 0) {
|
||||
bot.say(event.target, "No models available.");
|
||||
} else {
|
||||
const lines = models.map(
|
||||
(m: { name: string; size: number }) =>
|
||||
`${m.name} (${(m.size / 1e9).toFixed(1)}GB)`
|
||||
);
|
||||
bot.say(event.target, `Models: ${lines.join(", ")}`);
|
||||
}
|
||||
} catch (e) {
|
||||
bot.say(event.target, "Error fetching models from Ollama.");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "!help": {
|
||||
bot.say(event.target, "Commands: !invoke <template> [name] | !destroy <name> | !list | !model <name> <model> | !models | !templates | !help");
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
bot.say(event.target, `Error: ${msg}`);
|
||||
log(`Error handling command "${text}": ${msg}`);
|
||||
}
|
||||
});
|
||||
|
||||
bot.on("close", () => {
|
||||
log("Disconnected. Reconnecting in 5s...");
|
||||
setTimeout(() => {
|
||||
bot.connect({
|
||||
host: config.server,
|
||||
port: config.port,
|
||||
nick: config.nick,
|
||||
});
|
||||
}, 5000);
|
||||
});
|
||||
|
||||
// Graceful shutdown
|
||||
const shutdown = async () => {
|
||||
log("Shutting down, stopping all agents...");
|
||||
await stopAllAgents();
|
||||
bot.quit("Overseer shutting down");
|
||||
process.exit(0);
|
||||
};
|
||||
|
||||
process.on("SIGINT", shutdown);
|
||||
process.on("SIGTERM", shutdown);
|
||||
|
||||
log("Overseer started. Waiting for commands...");
|
||||
}
|
||||
98
src/rootfs.ts
Normal file
98
src/rootfs.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
import { execFileSync } from "node:child_process";
|
||||
import {
|
||||
existsSync,
|
||||
copyFileSync,
|
||||
mkdirSync,
|
||||
unlinkSync,
|
||||
} from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { randomBytes } from "node:crypto";
|
||||
import { CONFIG } from "./config.js";
|
||||
|
||||
export function ensureBaseImage() {
|
||||
if (!existsSync(CONFIG.baseRootfs)) {
|
||||
throw new Error(
|
||||
`Base rootfs not found at ${CONFIG.baseRootfs}. Run 'fireclaw setup' first.`
|
||||
);
|
||||
}
|
||||
if (!existsSync(CONFIG.kernelPath)) {
|
||||
throw new Error(
|
||||
`Kernel not found at ${CONFIG.kernelPath}. Run 'fireclaw setup' first.`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export function ensureSshKeypair() {
|
||||
if (!existsSync(CONFIG.sshKeyPath)) {
|
||||
execFileSync("ssh-keygen", [
|
||||
"-t",
|
||||
"ed25519",
|
||||
"-f",
|
||||
CONFIG.sshKeyPath,
|
||||
"-N",
|
||||
"",
|
||||
"-C",
|
||||
"fireclaw",
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
export function createRunCopy(vmId: string): string {
|
||||
mkdirSync(CONFIG.runsDir, { recursive: true });
|
||||
const dest = join(CONFIG.runsDir, `${vmId}.ext4`);
|
||||
copyFileSync(CONFIG.baseRootfs, dest);
|
||||
return dest;
|
||||
}
|
||||
|
||||
export function injectSshKey(rootfsPath: string) {
|
||||
const mountPoint = `/tmp/fireclaw-mount-${randomBytes(4).toString("hex")}`;
|
||||
mkdirSync(mountPoint, { recursive: true });
|
||||
|
||||
try {
|
||||
execFileSync("sudo", ["mount", "-o", "loop", rootfsPath, mountPoint], {
|
||||
stdio: "pipe",
|
||||
});
|
||||
|
||||
execFileSync("sudo", ["mkdir", "-p", join(mountPoint, "root/.ssh")], {
|
||||
stdio: "pipe",
|
||||
});
|
||||
execFileSync(
|
||||
"sudo",
|
||||
[
|
||||
"cp",
|
||||
CONFIG.sshPubKeyPath,
|
||||
join(mountPoint, "root/.ssh/authorized_keys"),
|
||||
],
|
||||
{ stdio: "pipe" }
|
||||
);
|
||||
execFileSync(
|
||||
"sudo",
|
||||
["chmod", "600", join(mountPoint, "root/.ssh/authorized_keys")],
|
||||
{ stdio: "pipe" }
|
||||
);
|
||||
execFileSync(
|
||||
"sudo",
|
||||
["chmod", "700", join(mountPoint, "root/.ssh")],
|
||||
{ stdio: "pipe" }
|
||||
);
|
||||
} finally {
|
||||
try {
|
||||
execFileSync("sudo", ["umount", mountPoint], { stdio: "pipe" });
|
||||
} catch {
|
||||
// Best effort
|
||||
}
|
||||
try {
|
||||
execFileSync("rmdir", [mountPoint], { stdio: "pipe" });
|
||||
} catch {
|
||||
// Best effort
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function deleteRunCopy(rootfsPath: string) {
|
||||
try {
|
||||
unlinkSync(rootfsPath);
|
||||
} catch {
|
||||
// Already gone
|
||||
}
|
||||
}
|
||||
117
src/setup.ts
Normal file
117
src/setup.ts
Normal file
@@ -0,0 +1,117 @@
|
||||
import { execFileSync, execSync } from "node:child_process";
|
||||
import { existsSync, mkdirSync } from "node:fs";
|
||||
import { CONFIG } from "./config.js";
|
||||
import { ensureBridge, ensureNat } from "./network.js";
|
||||
import { ensureSshKeypair } from "./rootfs.js";
|
||||
|
||||
function log(msg: string) {
|
||||
process.stderr.write(`[setup] ${msg}\n`);
|
||||
}
|
||||
|
||||
function download(url: string, dest: string) {
|
||||
execFileSync("curl", ["-fSL", "-o", dest, url], {
|
||||
stdio: ["pipe", "pipe", "inherit"],
|
||||
timeout: 300_000,
|
||||
});
|
||||
}
|
||||
|
||||
export async function runSetup() {
|
||||
log("Setting up fireclaw...");
|
||||
|
||||
// Create directories
|
||||
mkdirSync(CONFIG.baseDir, { recursive: true });
|
||||
mkdirSync(CONFIG.runsDir, { recursive: true });
|
||||
mkdirSync(CONFIG.socketDir, { recursive: true });
|
||||
|
||||
// Download kernel
|
||||
if (existsSync(CONFIG.kernelPath)) {
|
||||
log("Kernel already exists, skipping download.");
|
||||
} else {
|
||||
log("Downloading kernel...");
|
||||
download(CONFIG.assets.kernelUrl, CONFIG.kernelPath);
|
||||
log("Kernel downloaded.");
|
||||
}
|
||||
|
||||
// Download and convert rootfs
|
||||
if (existsSync(CONFIG.baseRootfs)) {
|
||||
log("Base rootfs already exists, skipping download.");
|
||||
} else {
|
||||
log("Downloading rootfs...");
|
||||
|
||||
// Find latest rootfs key from S3 listing
|
||||
const listing = execFileSync(
|
||||
"curl",
|
||||
["-fsSL", CONFIG.assets.rootfsListUrl],
|
||||
{ encoding: "utf-8", timeout: 30_000 }
|
||||
);
|
||||
const keys = [...listing.matchAll(/<Key>([^<]+)<\/Key>/g)].map(
|
||||
(m) => m[1]
|
||||
);
|
||||
const rootfsKey = keys.sort().pop();
|
||||
if (!rootfsKey) throw new Error("Could not find rootfs in S3 listing");
|
||||
|
||||
const squashfsPath = `${CONFIG.baseDir}/rootfs.squashfs`;
|
||||
download(`${CONFIG.assets.rootfsBaseUrl}/${rootfsKey}`, squashfsPath);
|
||||
log("Rootfs downloaded. Converting squashfs to ext4...");
|
||||
|
||||
// Convert squashfs to ext4
|
||||
const squashMount = "/tmp/fireclaw-squash";
|
||||
const ext4Mount = "/tmp/fireclaw-ext4";
|
||||
mkdirSync(squashMount, { recursive: true });
|
||||
mkdirSync(ext4Mount, { recursive: true });
|
||||
|
||||
try {
|
||||
execFileSync(
|
||||
"sudo",
|
||||
["mount", "-t", "squashfs", squashfsPath, squashMount],
|
||||
{ stdio: "pipe" }
|
||||
);
|
||||
execFileSync("truncate", ["-s", "1G", CONFIG.baseRootfs], {
|
||||
stdio: "pipe",
|
||||
});
|
||||
execFileSync("sudo", ["/usr/sbin/mkfs.ext4", CONFIG.baseRootfs], {
|
||||
stdio: "pipe",
|
||||
});
|
||||
execFileSync("sudo", ["mount", CONFIG.baseRootfs, ext4Mount], {
|
||||
stdio: "pipe",
|
||||
});
|
||||
execFileSync("sudo", ["cp", "-a", `${squashMount}/.`, ext4Mount], {
|
||||
stdio: "pipe",
|
||||
});
|
||||
|
||||
// Bake in DNS config
|
||||
execSync(
|
||||
`echo "nameserver 8.8.8.8" | sudo tee ${ext4Mount}/etc/resolv.conf > /dev/null`
|
||||
);
|
||||
|
||||
log("Rootfs converted.");
|
||||
} finally {
|
||||
try {
|
||||
execFileSync("sudo", ["umount", squashMount], { stdio: "pipe" });
|
||||
} catch {}
|
||||
try {
|
||||
execFileSync("sudo", ["umount", ext4Mount], { stdio: "pipe" });
|
||||
} catch {}
|
||||
try {
|
||||
execFileSync("rmdir", [squashMount], { stdio: "pipe" });
|
||||
} catch {}
|
||||
try {
|
||||
execFileSync("rmdir", [ext4Mount], { stdio: "pipe" });
|
||||
} catch {}
|
||||
try {
|
||||
execFileSync("rm", ["-f", squashfsPath], { stdio: "pipe" });
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate SSH keypair
|
||||
log("Ensuring SSH keypair...");
|
||||
ensureSshKeypair();
|
||||
|
||||
// Set up bridge and NAT
|
||||
log("Setting up network bridge...");
|
||||
ensureBridge();
|
||||
ensureNat();
|
||||
|
||||
log("Setup complete! Run 'fireclaw run \"uname -a\"' to test.");
|
||||
}
|
||||
128
src/snapshot.ts
Normal file
128
src/snapshot.ts
Normal file
@@ -0,0 +1,128 @@
|
||||
import { spawn, type ChildProcess } from "node:child_process";
|
||||
import { existsSync, mkdirSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { CONFIG } from "./config.js";
|
||||
import * as api from "./firecracker-api.js";
|
||||
import {
|
||||
ensureBridge,
|
||||
ensureNat,
|
||||
createTap,
|
||||
deleteTap,
|
||||
macFromOctet,
|
||||
} from "./network.js";
|
||||
import {
|
||||
ensureBaseImage,
|
||||
ensureSshKeypair,
|
||||
createRunCopy,
|
||||
injectSshKey,
|
||||
} from "./rootfs.js";
|
||||
import { waitForSsh } from "./ssh.js";
|
||||
import { copyFileSync } from "node:fs";
|
||||
|
||||
function log(msg: string) {
|
||||
process.stderr.write(`[snapshot] ${msg}\n`);
|
||||
}
|
||||
|
||||
function waitForSocket(socketPath: string): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const deadline = Date.now() + 5_000;
|
||||
const check = () => {
|
||||
if (existsSync(socketPath)) {
|
||||
setTimeout(resolve, 200);
|
||||
return;
|
||||
}
|
||||
if (Date.now() > deadline) {
|
||||
reject(new Error("Firecracker socket did not appear"));
|
||||
return;
|
||||
}
|
||||
setTimeout(check, 50);
|
||||
};
|
||||
check();
|
||||
});
|
||||
}
|
||||
|
||||
export function snapshotExists(): boolean {
|
||||
return (
|
||||
existsSync(CONFIG.snapshot.statePath) &&
|
||||
existsSync(CONFIG.snapshot.memPath) &&
|
||||
existsSync(CONFIG.snapshot.rootfsPath)
|
||||
);
|
||||
}
|
||||
|
||||
export async function createSnapshot() {
|
||||
ensureBaseImage();
|
||||
ensureSshKeypair();
|
||||
|
||||
const snap = CONFIG.snapshot;
|
||||
const socketPath = join(CONFIG.socketDir, "snapshot.sock");
|
||||
|
||||
log("Preparing snapshot rootfs...");
|
||||
mkdirSync(CONFIG.socketDir, { recursive: true });
|
||||
copyFileSync(CONFIG.baseRootfs, snap.rootfsPath);
|
||||
injectSshKey(snap.rootfsPath);
|
||||
|
||||
log("Setting up network...");
|
||||
ensureBridge();
|
||||
ensureNat();
|
||||
createTap(snap.tapDevice);
|
||||
|
||||
let proc: ChildProcess | null = null;
|
||||
|
||||
try {
|
||||
log("Booting VM for snapshot...");
|
||||
proc = spawn(CONFIG.firecrackerBin, ["--api-sock", socketPath], {
|
||||
stdio: "pipe",
|
||||
detached: false,
|
||||
});
|
||||
|
||||
await waitForSocket(socketPath);
|
||||
|
||||
const bootArgs = [
|
||||
"console=ttyS0",
|
||||
"reboot=k",
|
||||
"panic=1",
|
||||
"pci=off",
|
||||
"root=/dev/vda",
|
||||
"rw",
|
||||
`ip=${snap.ip}::${CONFIG.bridge.gateway}:${CONFIG.bridge.netmask}::eth0:off`,
|
||||
].join(" ");
|
||||
|
||||
await api.putBootSource(socketPath, CONFIG.kernelPath, bootArgs);
|
||||
await api.putDrive(socketPath, "rootfs", snap.rootfsPath);
|
||||
await api.putNetworkInterface(
|
||||
socketPath,
|
||||
"eth0",
|
||||
snap.tapDevice,
|
||||
macFromOctet(snap.octet)
|
||||
);
|
||||
await api.putMachineConfig(
|
||||
socketPath,
|
||||
CONFIG.vm.vcpuCount,
|
||||
CONFIG.vm.memSizeMib
|
||||
);
|
||||
await api.startInstance(socketPath);
|
||||
|
||||
log("Waiting for SSH...");
|
||||
await waitForSsh(snap.ip);
|
||||
|
||||
log("Pausing VM...");
|
||||
await api.patchVm(socketPath, "Paused");
|
||||
|
||||
log("Creating snapshot...");
|
||||
await api.putSnapshotCreate(socketPath, snap.statePath, snap.memPath);
|
||||
|
||||
log("Snapshot created successfully.");
|
||||
log(` State: ${snap.statePath}`);
|
||||
log(` Memory: ${snap.memPath}`);
|
||||
log(` Rootfs: ${snap.rootfsPath}`);
|
||||
} finally {
|
||||
if (proc && !proc.killed) {
|
||||
proc.kill("SIGKILL");
|
||||
}
|
||||
try {
|
||||
const { unlinkSync } = await import("node:fs");
|
||||
unlinkSync(socketPath);
|
||||
} catch {}
|
||||
deleteTap(snap.tapDevice);
|
||||
}
|
||||
}
|
||||
107
src/ssh.ts
Normal file
107
src/ssh.ts
Normal file
@@ -0,0 +1,107 @@
|
||||
import { Client } from "ssh2";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { createConnection } from "node:net";
|
||||
import { CONFIG } from "./config.js";
|
||||
import type { RunResult } from "./types.js";
|
||||
|
||||
export function waitForSsh(
|
||||
host: string,
|
||||
port = 22,
|
||||
timeoutMs = CONFIG.vm.bootTimeoutMs
|
||||
): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
|
||||
function attempt() {
|
||||
if (Date.now() > deadline) {
|
||||
reject(new Error(`SSH not ready after ${timeoutMs}ms`));
|
||||
return;
|
||||
}
|
||||
|
||||
const sock = createConnection({ host, port, timeout: 500 });
|
||||
|
||||
sock.on("connect", () => {
|
||||
sock.destroy();
|
||||
resolve();
|
||||
});
|
||||
|
||||
sock.on("error", () => {
|
||||
sock.destroy();
|
||||
setTimeout(attempt, CONFIG.vm.sshPollIntervalMs);
|
||||
});
|
||||
|
||||
sock.on("timeout", () => {
|
||||
sock.destroy();
|
||||
setTimeout(attempt, CONFIG.vm.sshPollIntervalMs);
|
||||
});
|
||||
}
|
||||
|
||||
attempt();
|
||||
});
|
||||
}
|
||||
|
||||
export function execCommand(
|
||||
host: string,
|
||||
command: string,
|
||||
timeoutMs: number,
|
||||
verbose: boolean
|
||||
): Promise<RunResult> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const startTime = Date.now();
|
||||
const privateKey = readFileSync(CONFIG.sshKeyPath);
|
||||
const conn = new Client();
|
||||
|
||||
const timer = setTimeout(() => {
|
||||
conn.end();
|
||||
reject(new Error(`Command timed out after ${timeoutMs}ms`));
|
||||
}, timeoutMs);
|
||||
|
||||
conn.on("ready", () => {
|
||||
conn.exec(command, (err, stream) => {
|
||||
if (err) {
|
||||
clearTimeout(timer);
|
||||
conn.end();
|
||||
reject(err);
|
||||
return;
|
||||
}
|
||||
|
||||
const stdoutChunks: Buffer[] = [];
|
||||
const stderrChunks: Buffer[] = [];
|
||||
|
||||
stream.on("data", (data: Buffer) => {
|
||||
stdoutChunks.push(data);
|
||||
if (verbose) process.stdout.write(data);
|
||||
});
|
||||
|
||||
stream.stderr.on("data", (data: Buffer) => {
|
||||
stderrChunks.push(data);
|
||||
if (verbose) process.stderr.write(data);
|
||||
});
|
||||
|
||||
stream.on("close", (code: number | null) => {
|
||||
clearTimeout(timer);
|
||||
conn.end();
|
||||
resolve({
|
||||
exitCode: code ?? 1,
|
||||
stdout: Buffer.concat(stdoutChunks).toString(),
|
||||
stderr: Buffer.concat(stderrChunks).toString(),
|
||||
durationMs: Date.now() - startTime,
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
conn.on("error", (err) => {
|
||||
clearTimeout(timer);
|
||||
reject(err);
|
||||
});
|
||||
|
||||
conn.connect({
|
||||
host,
|
||||
port: 22,
|
||||
username: "root",
|
||||
privateKey,
|
||||
hostVerifier: () => true,
|
||||
});
|
||||
});
|
||||
}
|
||||
24
src/types.ts
Normal file
24
src/types.ts
Normal file
@@ -0,0 +1,24 @@
|
||||
export interface VMConfig {
|
||||
id: string;
|
||||
guestIp: string;
|
||||
tapDevice: string;
|
||||
socketPath: string;
|
||||
rootfsPath: string;
|
||||
timeoutMs: number;
|
||||
verbose: boolean;
|
||||
}
|
||||
|
||||
export interface RunResult {
|
||||
exitCode: number;
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
durationMs: number;
|
||||
}
|
||||
|
||||
export interface RunOptions {
|
||||
timeout?: number;
|
||||
verbose?: boolean;
|
||||
mem?: number;
|
||||
vcpu?: number;
|
||||
noSnapshot?: boolean;
|
||||
}
|
||||
288
src/vm.ts
Normal file
288
src/vm.ts
Normal file
@@ -0,0 +1,288 @@
|
||||
import { spawn, type ChildProcess } from "node:child_process";
|
||||
import { existsSync, mkdirSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { randomBytes } from "node:crypto";
|
||||
import { CONFIG } from "./config.js";
|
||||
import type { VMConfig, RunResult, RunOptions } from "./types.js";
|
||||
import * as api from "./firecracker-api.js";
|
||||
import {
|
||||
ensureBridge,
|
||||
ensureNat,
|
||||
allocateIp,
|
||||
releaseIp,
|
||||
createTap,
|
||||
deleteTap,
|
||||
macFromOctet,
|
||||
} from "./network.js";
|
||||
import {
|
||||
ensureBaseImage,
|
||||
ensureSshKeypair,
|
||||
createRunCopy,
|
||||
injectSshKey,
|
||||
deleteRunCopy,
|
||||
} from "./rootfs.js";
|
||||
import { waitForSsh, execCommand } from "./ssh.js";
|
||||
import { registerVm, unregisterVm } from "./cleanup.js";
|
||||
import { snapshotExists } from "./snapshot.js";
|
||||
|
||||
function log(verbose: boolean, msg: string) {
|
||||
if (verbose) process.stderr.write(`[fireclaw] ${msg}\n`);
|
||||
}
|
||||
|
||||
export class VMInstance {
|
||||
private config: VMConfig;
|
||||
private process: ChildProcess | null = null;
|
||||
private octet = 0;
|
||||
|
||||
constructor(config: VMConfig) {
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
static async run(
|
||||
command: string,
|
||||
opts: RunOptions = {}
|
||||
): Promise<RunResult> {
|
||||
// Try snapshot path first unless disabled
|
||||
if (!opts.noSnapshot && snapshotExists()) {
|
||||
return VMInstance.runFromSnapshot(command, opts);
|
||||
}
|
||||
return VMInstance.runColdBoot(command, opts);
|
||||
}
|
||||
|
||||
private static async runFromSnapshot(
|
||||
command: string,
|
||||
opts: RunOptions
|
||||
): Promise<RunResult> {
|
||||
const id = `fc-snap-${randomBytes(3).toString("hex")}`;
|
||||
const verbose = opts.verbose ?? false;
|
||||
const timeoutMs = opts.timeout ?? CONFIG.vm.defaultTimeoutMs;
|
||||
const snap = CONFIG.snapshot;
|
||||
|
||||
mkdirSync(CONFIG.socketDir, { recursive: true });
|
||||
|
||||
const config: VMConfig = {
|
||||
id,
|
||||
guestIp: snap.ip,
|
||||
tapDevice: snap.tapDevice,
|
||||
socketPath: join(CONFIG.socketDir, `${id}.sock`),
|
||||
rootfsPath: "", // shared, not per-run
|
||||
timeoutMs,
|
||||
verbose,
|
||||
};
|
||||
|
||||
const vm = new VMInstance(config);
|
||||
vm.octet = 0; // no IP pool allocation for snapshot runs
|
||||
registerVm(vm);
|
||||
|
||||
try {
|
||||
log(verbose, `VM ${id}: restoring from snapshot...`);
|
||||
ensureBridge();
|
||||
ensureNat();
|
||||
createTap(snap.tapDevice);
|
||||
|
||||
// Spawn firecracker and load snapshot
|
||||
vm.process = spawn(
|
||||
CONFIG.firecrackerBin,
|
||||
["--api-sock", config.socketPath],
|
||||
{ stdio: "pipe", detached: false }
|
||||
);
|
||||
vm.process.on("error", (err) => {
|
||||
log(verbose, `Firecracker process error: ${err.message}`);
|
||||
});
|
||||
|
||||
await vm.waitForSocket();
|
||||
await api.putSnapshotLoad(
|
||||
config.socketPath,
|
||||
snap.statePath,
|
||||
snap.memPath
|
||||
);
|
||||
await api.patchVm(config.socketPath, "Resumed");
|
||||
|
||||
log(verbose, `VM ${id}: resumed, waiting for SSH...`);
|
||||
await waitForSsh(snap.ip);
|
||||
|
||||
log(verbose, `VM ${id}: executing command...`);
|
||||
const result = await execCommand(snap.ip, command, timeoutMs, verbose);
|
||||
|
||||
log(
|
||||
verbose,
|
||||
`VM ${id}: done (exit=${result.exitCode}, ${result.durationMs}ms)`
|
||||
);
|
||||
return result;
|
||||
} finally {
|
||||
await vm.destroy();
|
||||
unregisterVm(vm);
|
||||
}
|
||||
}
|
||||
|
||||
private static async runColdBoot(
|
||||
command: string,
|
||||
opts: RunOptions
|
||||
): Promise<RunResult> {
|
||||
const id = `fc-${randomBytes(3).toString("hex")}`;
|
||||
const verbose = opts.verbose ?? false;
|
||||
const timeoutMs = opts.timeout ?? CONFIG.vm.defaultTimeoutMs;
|
||||
|
||||
// Pre-flight checks
|
||||
ensureBaseImage();
|
||||
ensureSshKeypair();
|
||||
|
||||
// Allocate resources
|
||||
const { ip, octet } = allocateIp();
|
||||
const tapDevice = `fctap${octet}`;
|
||||
|
||||
mkdirSync(CONFIG.socketDir, { recursive: true });
|
||||
|
||||
const config: VMConfig = {
|
||||
id,
|
||||
guestIp: ip,
|
||||
tapDevice,
|
||||
socketPath: join(CONFIG.socketDir, `${id}.sock`),
|
||||
rootfsPath: "",
|
||||
timeoutMs,
|
||||
verbose,
|
||||
};
|
||||
|
||||
const vm = new VMInstance(config);
|
||||
vm.octet = octet;
|
||||
registerVm(vm);
|
||||
|
||||
try {
|
||||
log(verbose, `VM ${id}: preparing rootfs...`);
|
||||
config.rootfsPath = createRunCopy(id);
|
||||
injectSshKey(config.rootfsPath);
|
||||
|
||||
log(verbose, `VM ${id}: creating tap ${tapDevice}...`);
|
||||
ensureBridge();
|
||||
ensureNat();
|
||||
createTap(tapDevice);
|
||||
|
||||
log(verbose, `VM ${id}: booting...`);
|
||||
await vm.boot(opts);
|
||||
|
||||
log(verbose, `VM ${id}: waiting for SSH at ${ip}...`);
|
||||
await waitForSsh(ip);
|
||||
|
||||
log(verbose, `VM ${id}: executing command...`);
|
||||
const result = await execCommand(ip, command, timeoutMs, verbose);
|
||||
|
||||
log(
|
||||
verbose,
|
||||
`VM ${id}: done (exit=${result.exitCode}, ${result.durationMs}ms)`
|
||||
);
|
||||
return result;
|
||||
} finally {
|
||||
await vm.destroy();
|
||||
unregisterVm(vm);
|
||||
}
|
||||
}
|
||||
|
||||
private async boot(opts: RunOptions) {
|
||||
const { config } = this;
|
||||
const vcpu = opts.vcpu ?? CONFIG.vm.vcpuCount;
|
||||
const mem = opts.mem ?? CONFIG.vm.memSizeMib;
|
||||
|
||||
// Spawn firecracker
|
||||
this.process = spawn(
|
||||
CONFIG.firecrackerBin,
|
||||
["--api-sock", config.socketPath],
|
||||
{
|
||||
stdio: "pipe",
|
||||
detached: false,
|
||||
}
|
||||
);
|
||||
|
||||
this.process.on("error", (err) => {
|
||||
log(config.verbose, `Firecracker process error: ${err.message}`);
|
||||
});
|
||||
|
||||
// Wait for socket
|
||||
await this.waitForSocket();
|
||||
|
||||
// Configure via API
|
||||
const bootArgs = [
|
||||
"console=ttyS0",
|
||||
"reboot=k",
|
||||
"panic=1",
|
||||
"pci=off",
|
||||
"root=/dev/vda",
|
||||
"rw",
|
||||
`ip=${config.guestIp}::${CONFIG.bridge.gateway}:${CONFIG.bridge.netmask}::eth0:off`,
|
||||
].join(" ");
|
||||
|
||||
await api.putBootSource(config.socketPath, CONFIG.kernelPath, bootArgs);
|
||||
await api.putDrive(config.socketPath, "rootfs", config.rootfsPath);
|
||||
await api.putNetworkInterface(
|
||||
config.socketPath,
|
||||
"eth0",
|
||||
config.tapDevice,
|
||||
macFromOctet(this.octet)
|
||||
);
|
||||
await api.putMachineConfig(config.socketPath, vcpu, mem);
|
||||
await api.startInstance(config.socketPath);
|
||||
}
|
||||
|
||||
private waitForSocket(): Promise<void> {
|
||||
const socketPath = this.config.socketPath;
|
||||
return new Promise((resolve, reject) => {
|
||||
const deadline = Date.now() + 5_000;
|
||||
|
||||
const check = () => {
|
||||
if (existsSync(socketPath)) {
|
||||
setTimeout(resolve, 200);
|
||||
return;
|
||||
}
|
||||
if (Date.now() > deadline) {
|
||||
reject(new Error("Firecracker socket did not appear"));
|
||||
return;
|
||||
}
|
||||
setTimeout(check, 50);
|
||||
};
|
||||
|
||||
check();
|
||||
});
|
||||
}
|
||||
|
||||
async destroy() {
|
||||
const { config } = this;
|
||||
log(config.verbose, `VM ${config.id}: cleaning up...`);
|
||||
|
||||
// Kill firecracker
|
||||
if (this.process && !this.process.killed) {
|
||||
this.process.kill("SIGTERM");
|
||||
await new Promise<void>((resolve) => {
|
||||
const timer = setTimeout(() => {
|
||||
if (this.process && !this.process.killed) {
|
||||
this.process.kill("SIGKILL");
|
||||
}
|
||||
resolve();
|
||||
}, 2_000);
|
||||
this.process!.on("exit", () => {
|
||||
clearTimeout(timer);
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Clean up socket
|
||||
try {
|
||||
const { unlinkSync } = await import("node:fs");
|
||||
unlinkSync(config.socketPath);
|
||||
} catch {
|
||||
// Already gone
|
||||
}
|
||||
|
||||
// Clean up tap device
|
||||
deleteTap(config.tapDevice);
|
||||
|
||||
// Release IP (skip for snapshot runs which don't allocate from pool)
|
||||
if (this.octet > 0) {
|
||||
releaseIp(this.octet);
|
||||
}
|
||||
|
||||
// Delete rootfs copy (skip for snapshot runs which share rootfs)
|
||||
if (config.rootfsPath) {
|
||||
deleteRunCopy(config.rootfsPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user