Compare commits

...

2 Commits

Author SHA1 Message Date
2e5912e73c Add refactoring note to TODO 2026-04-07 16:32:32 +00:00
27cb6508dc Extract shared VM lifecycle helpers into firecracker-vm.ts 2026-04-07 16:32:24 +00:00
5 changed files with 223 additions and 266 deletions

View File

@@ -21,6 +21,8 @@
- [x] Systemd service (KillMode=process) - [x] Systemd service (KillMode=process)
- [x] Regression test suite (20 tests) - [x] Regression test suite (20 tests)
- [ ] Refactor duplicated code — waitForSocket, boot sequence, tap setup, rootfs mount/inject are copy-pasted across vm.ts, snapshot.ts, agent-manager.ts. Extract shared helpers.
## Next up ## Next up
- [ ] Network policies per agent — restrict internet access - [ ] Network policies per agent — restrict internet access

View File

@@ -1,4 +1,3 @@
import { spawn } from "node:child_process";
import { import {
existsSync, existsSync,
mkdirSync, mkdirSync,
@@ -12,18 +11,18 @@ import { join } from "node:path";
import { execFileSync } from "node:child_process"; import { execFileSync } from "node:child_process";
import { CONFIG } from "./config.js"; import { CONFIG } from "./config.js";
import { import {
ensureBridge,
ensureNat,
allocateIp, allocateIp,
releaseIp, releaseIp,
createTap,
deleteTap, deleteTap,
macFromOctet,
applyNetworkPolicy, applyNetworkPolicy,
removeNetworkPolicy, removeNetworkPolicy,
type NetworkPolicy, type NetworkPolicy,
} from "./network.js"; } from "./network.js";
import * as api from "./firecracker-api.js"; import {
setupNetwork,
spawnFirecracker,
bootVM,
} from "./firecracker-vm.js";
export interface AgentInfo { export interface AgentInfo {
name: string; name: string;
@@ -201,24 +200,6 @@ function ensureWorkspace(agentName: string): string {
return imgPath; return imgPath;
} }
function waitForSocket(socketPath: string): Promise<void> {
return new Promise((resolve, reject) => {
const deadline = Date.now() + 5_000;
const check = () => {
if (existsSync(socketPath)) {
setTimeout(resolve, 200);
return;
}
if (Date.now() > deadline) {
reject(new Error("Firecracker socket did not appear"));
return;
}
setTimeout(check, 50);
};
check();
});
}
export async function startAgent( export async function startAgent(
templateName: string, templateName: string,
overrides?: { name?: string; model?: string } overrides?: { name?: string; model?: string }
@@ -266,46 +247,18 @@ export async function startAgent(
const workspacePath = ensureWorkspace(name); const workspacePath = ensureWorkspace(name);
// Setup network // Setup network
ensureBridge(); setupNetwork(tapDevice);
ensureNat();
deleteTap(tapDevice); // clean stale tap from previous run
createTap(tapDevice);
// Boot VM // Boot VM
const proc = spawn( const proc = await spawnFirecracker(socketPath, { detached: true });
CONFIG.firecrackerBin, await bootVM({
["--api-sock", socketPath],
{ stdio: "pipe", detached: true }
);
proc.unref();
await waitForSocket(socketPath);
const bootArgs = [
"console=ttyS0",
"reboot=k",
"panic=1",
"pci=off",
"root=/dev/vda",
"rw",
`ip=${ip}::${CONFIG.bridge.gateway}:${CONFIG.bridge.netmask}::eth0:off`,
].join(" ");
await api.putBootSource(socketPath, CONFIG.kernelPath, bootArgs);
await api.putDrive(socketPath, "rootfs", rootfsPath);
await api.putDrive(socketPath, "workspace", workspacePath, false, false);
await api.putNetworkInterface(
socketPath, socketPath,
"eth0", rootfsPath,
extraDrives: [{ id: "workspace", path: workspacePath }],
tapDevice, tapDevice,
macFromOctet(octet) ip,
); octet,
await api.putMachineConfig( });
socketPath,
CONFIG.vm.vcpuCount,
CONFIG.vm.memSizeMib
);
await api.startInstance(socketPath);
// Apply network policy // Apply network policy
const networkPolicy: NetworkPolicy = template.network ?? "full"; const networkPolicy: NetworkPolicy = template.network ?? "full";

164
src/firecracker-vm.ts Normal file
View File

@@ -0,0 +1,164 @@
/**
* Shared Firecracker VM lifecycle helpers.
* Used by vm.ts, snapshot.ts, and agent-manager.ts.
*/
import { spawn, type ChildProcess } from "node:child_process";
import { existsSync, unlinkSync, mkdirSync } from "node:fs";
import { CONFIG } from "./config.js";
import * as api from "./firecracker-api.js";
import {
ensureBridge,
ensureNat,
createTap,
deleteTap,
macFromOctet,
} from "./network.js";
export interface BootOptions {
socketPath: string;
kernelPath?: string;
rootfsPath: string;
extraDrives?: { id: string; path: string; readOnly?: boolean }[];
tapDevice: string;
ip: string;
octet: number;
vcpu?: number;
mem?: number;
}
/**
* Wait for a Firecracker API socket to appear.
*/
export function waitForSocket(
socketPath: string,
timeoutMs = 5_000
): Promise<void> {
return new Promise((resolve, reject) => {
const deadline = Date.now() + timeoutMs;
const check = () => {
if (existsSync(socketPath)) {
setTimeout(resolve, 200);
return;
}
if (Date.now() > deadline) {
reject(new Error("Firecracker socket did not appear"));
return;
}
setTimeout(check, 50);
};
check();
});
}
/**
* Set up network for a VM: ensure bridge, NAT, and create tap device.
* Cleans stale tap first.
*/
export function setupNetwork(tapDevice: string) {
ensureBridge();
ensureNat();
deleteTap(tapDevice);
createTap(tapDevice);
}
/**
* Spawn a Firecracker process and wait for the API socket.
*/
export async function spawnFirecracker(
socketPath: string,
opts?: { detached?: boolean }
): Promise<ChildProcess> {
// Clean stale socket
try {
unlinkSync(socketPath);
} catch {}
mkdirSync(CONFIG.socketDir, { recursive: true });
const proc = spawn(
CONFIG.firecrackerBin,
["--api-sock", socketPath],
{
stdio: "pipe",
detached: opts?.detached ?? false,
}
);
if (opts?.detached) proc.unref();
await waitForSocket(socketPath);
return proc;
}
/**
* Configure and start a Firecracker VM via its API.
*/
export async function bootVM(opts: BootOptions) {
const kernel = opts.kernelPath ?? CONFIG.kernelPath;
const vcpu = opts.vcpu ?? CONFIG.vm.vcpuCount;
const mem = opts.mem ?? CONFIG.vm.memSizeMib;
const bootArgs = [
"console=ttyS0",
"reboot=k",
"panic=1",
"pci=off",
"root=/dev/vda",
"rw",
`ip=${opts.ip}::${CONFIG.bridge.gateway}:${CONFIG.bridge.netmask}::eth0:off`,
].join(" ");
await api.putBootSource(opts.socketPath, kernel, bootArgs);
await api.putDrive(opts.socketPath, "rootfs", opts.rootfsPath);
if (opts.extraDrives) {
for (const drive of opts.extraDrives) {
await api.putDrive(
opts.socketPath,
drive.id,
drive.path,
drive.readOnly ?? false,
false
);
}
}
await api.putNetworkInterface(
opts.socketPath,
"eth0",
opts.tapDevice,
macFromOctet(opts.octet)
);
await api.putMachineConfig(opts.socketPath, vcpu, mem);
await api.startInstance(opts.socketPath);
}
/**
* Kill a Firecracker process and clean up its socket.
*/
export async function killFirecracker(
proc: ChildProcess | null,
socketPath: string,
signal: NodeJS.Signals = "SIGTERM"
) {
if (proc && !proc.killed) {
proc.kill(signal);
await new Promise<void>((resolve) => {
const timer = setTimeout(() => {
if (proc && !proc.killed) {
proc.kill("SIGKILL");
}
resolve();
}, 2_000);
proc.on("exit", () => {
clearTimeout(timer);
resolve();
});
});
}
try {
unlinkSync(socketPath);
} catch {}
}

View File

@@ -1,45 +1,22 @@
import { spawn, type ChildProcess } from "node:child_process"; import { type ChildProcess } from "node:child_process";
import { existsSync, mkdirSync } from "node:fs"; import { existsSync, mkdirSync, copyFileSync } from "node:fs";
import { join } from "node:path"; import { join } from "node:path";
import { CONFIG } from "./config.js"; import { CONFIG } from "./config.js";
import * as api from "./firecracker-api.js"; import * as api from "./firecracker-api.js";
import { import { deleteTap } from "./network.js";
ensureBridge, import { ensureBaseImage, ensureSshKeypair, injectSshKey } from "./rootfs.js";
ensureNat,
createTap,
deleteTap,
macFromOctet,
} from "./network.js";
import {
ensureBaseImage,
ensureSshKeypair,
injectSshKey,
} from "./rootfs.js";
import { waitForSsh } from "./ssh.js"; import { waitForSsh } from "./ssh.js";
import { copyFileSync } from "node:fs"; import {
setupNetwork,
spawnFirecracker,
bootVM,
killFirecracker,
} from "./firecracker-vm.js";
function log(msg: string) { function log(msg: string) {
process.stderr.write(`[snapshot] ${msg}\n`); process.stderr.write(`[snapshot] ${msg}\n`);
} }
function waitForSocket(socketPath: string): Promise<void> {
return new Promise((resolve, reject) => {
const deadline = Date.now() + 5_000;
const check = () => {
if (existsSync(socketPath)) {
setTimeout(resolve, 200);
return;
}
if (Date.now() > deadline) {
reject(new Error("Firecracker socket did not appear"));
return;
}
setTimeout(check, 50);
};
check();
});
}
export function snapshotExists(): boolean { export function snapshotExists(): boolean {
return ( return (
existsSync(CONFIG.snapshot.statePath) && existsSync(CONFIG.snapshot.statePath) &&
@@ -61,47 +38,21 @@ export async function createSnapshot() {
injectSshKey(snap.rootfsPath); injectSshKey(snap.rootfsPath);
log("Setting up network..."); log("Setting up network...");
ensureBridge(); setupNetwork(snap.tapDevice);
ensureNat();
deleteTap(snap.tapDevice); // clean stale tap from previous run
createTap(snap.tapDevice);
let proc: ChildProcess | null = null; let proc: ChildProcess | null = null;
try { try {
log("Booting VM for snapshot..."); log("Booting VM for snapshot...");
proc = spawn(CONFIG.firecrackerBin, ["--api-sock", socketPath], { proc = await spawnFirecracker(socketPath);
stdio: "pipe", await bootVM({
detached: false, socketPath,
rootfsPath: snap.rootfsPath,
tapDevice: snap.tapDevice,
ip: snap.ip,
octet: snap.octet,
}); });
await waitForSocket(socketPath);
const bootArgs = [
"console=ttyS0",
"reboot=k",
"panic=1",
"pci=off",
"root=/dev/vda",
"rw",
`ip=${snap.ip}::${CONFIG.bridge.gateway}:${CONFIG.bridge.netmask}::eth0:off`,
].join(" ");
await api.putBootSource(socketPath, CONFIG.kernelPath, bootArgs);
await api.putDrive(socketPath, "rootfs", snap.rootfsPath);
await api.putNetworkInterface(
socketPath,
"eth0",
snap.tapDevice,
macFromOctet(snap.octet)
);
await api.putMachineConfig(
socketPath,
CONFIG.vm.vcpuCount,
CONFIG.vm.memSizeMib
);
await api.startInstance(socketPath);
log("Waiting for SSH..."); log("Waiting for SSH...");
await waitForSsh(snap.ip); await waitForSsh(snap.ip);
@@ -116,13 +67,7 @@ export async function createSnapshot() {
log(` Memory: ${snap.memPath}`); log(` Memory: ${snap.memPath}`);
log(` Rootfs: ${snap.rootfsPath}`); log(` Rootfs: ${snap.rootfsPath}`);
} finally { } finally {
if (proc && !proc.killed) { await killFirecracker(proc, socketPath, "SIGKILL");
proc.kill("SIGKILL");
}
try {
const { unlinkSync } = await import("node:fs");
unlinkSync(socketPath);
} catch {}
deleteTap(snap.tapDevice); deleteTap(snap.tapDevice);
} }
} }

157
src/vm.ts
View File

@@ -1,19 +1,11 @@
import { spawn, type ChildProcess } from "node:child_process"; import { type ChildProcess } from "node:child_process";
import { existsSync, mkdirSync } from "node:fs"; import { mkdirSync } from "node:fs";
import { join } from "node:path"; import { join } from "node:path";
import { randomBytes } from "node:crypto"; import { randomBytes } from "node:crypto";
import { CONFIG } from "./config.js"; import { CONFIG } from "./config.js";
import type { VMConfig, RunResult, RunOptions } from "./types.js"; import type { VMConfig, RunResult, RunOptions } from "./types.js";
import * as api from "./firecracker-api.js"; import * as api from "./firecracker-api.js";
import { import { allocateIp, releaseIp, deleteTap } from "./network.js";
ensureBridge,
ensureNat,
allocateIp,
releaseIp,
createTap,
deleteTap,
macFromOctet,
} from "./network.js";
import { import {
ensureBaseImage, ensureBaseImage,
ensureSshKeypair, ensureSshKeypair,
@@ -24,6 +16,12 @@ import {
import { waitForSsh, execCommand } from "./ssh.js"; import { waitForSsh, execCommand } from "./ssh.js";
import { registerVm, unregisterVm } from "./cleanup.js"; import { registerVm, unregisterVm } from "./cleanup.js";
import { snapshotExists } from "./snapshot.js"; import { snapshotExists } from "./snapshot.js";
import {
setupNetwork,
spawnFirecracker,
bootVM,
killFirecracker,
} from "./firecracker-vm.js";
function log(verbose: boolean, msg: string) { function log(verbose: boolean, msg: string) {
if (verbose) process.stderr.write(`[fireclaw] ${msg}\n`); if (verbose) process.stderr.write(`[fireclaw] ${msg}\n`);
@@ -42,7 +40,6 @@ export class VMInstance {
command: string, command: string,
opts: RunOptions = {} opts: RunOptions = {}
): Promise<RunResult> { ): Promise<RunResult> {
// Try snapshot path first unless disabled
if (!opts.noSnapshot && snapshotExists()) { if (!opts.noSnapshot && snapshotExists()) {
return VMInstance.runFromSnapshot(command, opts); return VMInstance.runFromSnapshot(command, opts);
} }
@@ -65,33 +62,20 @@ export class VMInstance {
guestIp: snap.ip, guestIp: snap.ip,
tapDevice: snap.tapDevice, tapDevice: snap.tapDevice,
socketPath: join(CONFIG.socketDir, `${id}.sock`), socketPath: join(CONFIG.socketDir, `${id}.sock`),
rootfsPath: "", // shared, not per-run rootfsPath: "",
timeoutMs, timeoutMs,
verbose, verbose,
}; };
const vm = new VMInstance(config); const vm = new VMInstance(config);
vm.octet = 0; // no IP pool allocation for snapshot runs vm.octet = 0;
registerVm(vm); registerVm(vm);
try { try {
log(verbose, `VM ${id}: restoring from snapshot...`); log(verbose, `VM ${id}: restoring from snapshot...`);
ensureBridge(); setupNetwork(snap.tapDevice);
ensureNat();
deleteTap(snap.tapDevice); // clean stale tap from previous run
createTap(snap.tapDevice);
// Spawn firecracker and load snapshot vm.process = await spawnFirecracker(config.socketPath);
vm.process = spawn(
CONFIG.firecrackerBin,
["--api-sock", config.socketPath],
{ stdio: "pipe", detached: false }
);
vm.process.on("error", (err) => {
log(verbose, `Firecracker process error: ${err.message}`);
});
await vm.waitForSocket();
await api.putSnapshotLoad( await api.putSnapshotLoad(
config.socketPath, config.socketPath,
snap.statePath, snap.statePath,
@@ -124,16 +108,12 @@ export class VMInstance {
const verbose = opts.verbose ?? false; const verbose = opts.verbose ?? false;
const timeoutMs = opts.timeout ?? CONFIG.vm.defaultTimeoutMs; const timeoutMs = opts.timeout ?? CONFIG.vm.defaultTimeoutMs;
// Pre-flight checks
ensureBaseImage(); ensureBaseImage();
ensureSshKeypair(); ensureSshKeypair();
// Allocate resources
const { ip, octet } = allocateIp(); const { ip, octet } = allocateIp();
const tapDevice = `fctap${octet}`; const tapDevice = `fctap${octet}`;
mkdirSync(CONFIG.socketDir, { recursive: true });
const config: VMConfig = { const config: VMConfig = {
id, id,
guestIp: ip, guestIp: ip,
@@ -154,13 +134,19 @@ export class VMInstance {
injectSshKey(config.rootfsPath); injectSshKey(config.rootfsPath);
log(verbose, `VM ${id}: creating tap ${tapDevice}...`); log(verbose, `VM ${id}: creating tap ${tapDevice}...`);
ensureBridge(); setupNetwork(tapDevice);
ensureNat();
deleteTap(tapDevice); // clean stale tap from previous run
createTap(tapDevice);
log(verbose, `VM ${id}: booting...`); log(verbose, `VM ${id}: booting...`);
await vm.boot(opts); vm.process = await spawnFirecracker(config.socketPath);
await bootVM({
socketPath: config.socketPath,
rootfsPath: config.rootfsPath,
tapDevice,
ip,
octet,
vcpu: opts.vcpu,
mem: opts.mem,
});
log(verbose, `VM ${id}: waiting for SSH at ${ip}...`); log(verbose, `VM ${id}: waiting for SSH at ${ip}...`);
await waitForSsh(ip); await waitForSsh(ip);
@@ -179,110 +165,17 @@ export class VMInstance {
} }
} }
private async boot(opts: RunOptions) {
const { config } = this;
const vcpu = opts.vcpu ?? CONFIG.vm.vcpuCount;
const mem = opts.mem ?? CONFIG.vm.memSizeMib;
// Spawn firecracker
this.process = spawn(
CONFIG.firecrackerBin,
["--api-sock", config.socketPath],
{
stdio: "pipe",
detached: false,
}
);
this.process.on("error", (err) => {
log(config.verbose, `Firecracker process error: ${err.message}`);
});
// Wait for socket
await this.waitForSocket();
// Configure via API
const bootArgs = [
"console=ttyS0",
"reboot=k",
"panic=1",
"pci=off",
"root=/dev/vda",
"rw",
`ip=${config.guestIp}::${CONFIG.bridge.gateway}:${CONFIG.bridge.netmask}::eth0:off`,
].join(" ");
await api.putBootSource(config.socketPath, CONFIG.kernelPath, bootArgs);
await api.putDrive(config.socketPath, "rootfs", config.rootfsPath);
await api.putNetworkInterface(
config.socketPath,
"eth0",
config.tapDevice,
macFromOctet(this.octet)
);
await api.putMachineConfig(config.socketPath, vcpu, mem);
await api.startInstance(config.socketPath);
}
private waitForSocket(): Promise<void> {
const socketPath = this.config.socketPath;
return new Promise((resolve, reject) => {
const deadline = Date.now() + 5_000;
const check = () => {
if (existsSync(socketPath)) {
setTimeout(resolve, 200);
return;
}
if (Date.now() > deadline) {
reject(new Error("Firecracker socket did not appear"));
return;
}
setTimeout(check, 50);
};
check();
});
}
async destroy() { async destroy() {
const { config } = this; const { config } = this;
log(config.verbose, `VM ${config.id}: cleaning up...`); log(config.verbose, `VM ${config.id}: cleaning up...`);
// Kill firecracker await killFirecracker(this.process, config.socketPath);
if (this.process && !this.process.killed) {
this.process.kill("SIGTERM");
await new Promise<void>((resolve) => {
const timer = setTimeout(() => {
if (this.process && !this.process.killed) {
this.process.kill("SIGKILL");
}
resolve();
}, 2_000);
this.process!.on("exit", () => {
clearTimeout(timer);
resolve();
});
});
}
// Clean up socket
try {
const { unlinkSync } = await import("node:fs");
unlinkSync(config.socketPath);
} catch {
// Already gone
}
// Clean up tap device
deleteTap(config.tapDevice); deleteTap(config.tapDevice);
// Release IP (skip for snapshot runs which don't allocate from pool)
if (this.octet > 0) { if (this.octet > 0) {
releaseIp(this.octet); releaseIp(this.octet);
} }
// Delete rootfs copy (skip for snapshot runs which share rootfs)
if (config.rootfsPath) { if (config.rootfsPath) {
deleteRunCopy(config.rootfsPath); deleteRunCopy(config.rootfsPath);
} }