Extract shared VM lifecycle helpers into firecracker-vm.ts

This commit is contained in:
2026-04-07 16:32:24 +00:00
parent a2cef20a89
commit 27cb6508dc
4 changed files with 221 additions and 266 deletions

View File

@@ -1,4 +1,3 @@
import { spawn } from "node:child_process";
import {
existsSync,
mkdirSync,
@@ -12,18 +11,18 @@ import { join } from "node:path";
import { execFileSync } from "node:child_process";
import { CONFIG } from "./config.js";
import {
ensureBridge,
ensureNat,
allocateIp,
releaseIp,
createTap,
deleteTap,
macFromOctet,
applyNetworkPolicy,
removeNetworkPolicy,
type NetworkPolicy,
} from "./network.js";
import * as api from "./firecracker-api.js";
import {
setupNetwork,
spawnFirecracker,
bootVM,
} from "./firecracker-vm.js";
export interface AgentInfo {
name: string;
@@ -201,24 +200,6 @@ function ensureWorkspace(agentName: string): string {
return imgPath;
}
function waitForSocket(socketPath: string): Promise<void> {
return new Promise((resolve, reject) => {
const deadline = Date.now() + 5_000;
const check = () => {
if (existsSync(socketPath)) {
setTimeout(resolve, 200);
return;
}
if (Date.now() > deadline) {
reject(new Error("Firecracker socket did not appear"));
return;
}
setTimeout(check, 50);
};
check();
});
}
export async function startAgent(
templateName: string,
overrides?: { name?: string; model?: string }
@@ -266,46 +247,18 @@ export async function startAgent(
const workspacePath = ensureWorkspace(name);
// Setup network
ensureBridge();
ensureNat();
deleteTap(tapDevice); // clean stale tap from previous run
createTap(tapDevice);
setupNetwork(tapDevice);
// Boot VM
const proc = spawn(
CONFIG.firecrackerBin,
["--api-sock", socketPath],
{ stdio: "pipe", detached: true }
);
proc.unref();
await waitForSocket(socketPath);
const bootArgs = [
"console=ttyS0",
"reboot=k",
"panic=1",
"pci=off",
"root=/dev/vda",
"rw",
`ip=${ip}::${CONFIG.bridge.gateway}:${CONFIG.bridge.netmask}::eth0:off`,
].join(" ");
await api.putBootSource(socketPath, CONFIG.kernelPath, bootArgs);
await api.putDrive(socketPath, "rootfs", rootfsPath);
await api.putDrive(socketPath, "workspace", workspacePath, false, false);
await api.putNetworkInterface(
const proc = await spawnFirecracker(socketPath, { detached: true });
await bootVM({
socketPath,
"eth0",
rootfsPath,
extraDrives: [{ id: "workspace", path: workspacePath }],
tapDevice,
macFromOctet(octet)
);
await api.putMachineConfig(
socketPath,
CONFIG.vm.vcpuCount,
CONFIG.vm.memSizeMib
);
await api.startInstance(socketPath);
ip,
octet,
});
// Apply network policy
const networkPolicy: NetworkPolicy = template.network ?? "full";

164
src/firecracker-vm.ts Normal file
View File

@@ -0,0 +1,164 @@
/**
* Shared Firecracker VM lifecycle helpers.
* Used by vm.ts, snapshot.ts, and agent-manager.ts.
*/
import { spawn, type ChildProcess } from "node:child_process";
import { existsSync, unlinkSync, mkdirSync } from "node:fs";
import { CONFIG } from "./config.js";
import * as api from "./firecracker-api.js";
import {
ensureBridge,
ensureNat,
createTap,
deleteTap,
macFromOctet,
} from "./network.js";
export interface BootOptions {
socketPath: string;
kernelPath?: string;
rootfsPath: string;
extraDrives?: { id: string; path: string; readOnly?: boolean }[];
tapDevice: string;
ip: string;
octet: number;
vcpu?: number;
mem?: number;
}
/**
* Wait for a Firecracker API socket to appear.
*/
export function waitForSocket(
socketPath: string,
timeoutMs = 5_000
): Promise<void> {
return new Promise((resolve, reject) => {
const deadline = Date.now() + timeoutMs;
const check = () => {
if (existsSync(socketPath)) {
setTimeout(resolve, 200);
return;
}
if (Date.now() > deadline) {
reject(new Error("Firecracker socket did not appear"));
return;
}
setTimeout(check, 50);
};
check();
});
}
/**
* Set up network for a VM: ensure bridge, NAT, and create tap device.
* Cleans stale tap first.
*/
export function setupNetwork(tapDevice: string) {
ensureBridge();
ensureNat();
deleteTap(tapDevice);
createTap(tapDevice);
}
/**
* Spawn a Firecracker process and wait for the API socket.
*/
export async function spawnFirecracker(
socketPath: string,
opts?: { detached?: boolean }
): Promise<ChildProcess> {
// Clean stale socket
try {
unlinkSync(socketPath);
} catch {}
mkdirSync(CONFIG.socketDir, { recursive: true });
const proc = spawn(
CONFIG.firecrackerBin,
["--api-sock", socketPath],
{
stdio: "pipe",
detached: opts?.detached ?? false,
}
);
if (opts?.detached) proc.unref();
await waitForSocket(socketPath);
return proc;
}
/**
* Configure and start a Firecracker VM via its API.
*/
export async function bootVM(opts: BootOptions) {
const kernel = opts.kernelPath ?? CONFIG.kernelPath;
const vcpu = opts.vcpu ?? CONFIG.vm.vcpuCount;
const mem = opts.mem ?? CONFIG.vm.memSizeMib;
const bootArgs = [
"console=ttyS0",
"reboot=k",
"panic=1",
"pci=off",
"root=/dev/vda",
"rw",
`ip=${opts.ip}::${CONFIG.bridge.gateway}:${CONFIG.bridge.netmask}::eth0:off`,
].join(" ");
await api.putBootSource(opts.socketPath, kernel, bootArgs);
await api.putDrive(opts.socketPath, "rootfs", opts.rootfsPath);
if (opts.extraDrives) {
for (const drive of opts.extraDrives) {
await api.putDrive(
opts.socketPath,
drive.id,
drive.path,
drive.readOnly ?? false,
false
);
}
}
await api.putNetworkInterface(
opts.socketPath,
"eth0",
opts.tapDevice,
macFromOctet(opts.octet)
);
await api.putMachineConfig(opts.socketPath, vcpu, mem);
await api.startInstance(opts.socketPath);
}
/**
* Kill a Firecracker process and clean up its socket.
*/
export async function killFirecracker(
proc: ChildProcess | null,
socketPath: string,
signal: NodeJS.Signals = "SIGTERM"
) {
if (proc && !proc.killed) {
proc.kill(signal);
await new Promise<void>((resolve) => {
const timer = setTimeout(() => {
if (proc && !proc.killed) {
proc.kill("SIGKILL");
}
resolve();
}, 2_000);
proc.on("exit", () => {
clearTimeout(timer);
resolve();
});
});
}
try {
unlinkSync(socketPath);
} catch {}
}

View File

@@ -1,45 +1,22 @@
import { spawn, type ChildProcess } from "node:child_process";
import { existsSync, mkdirSync } from "node:fs";
import { type ChildProcess } from "node:child_process";
import { existsSync, mkdirSync, copyFileSync } from "node:fs";
import { join } from "node:path";
import { CONFIG } from "./config.js";
import * as api from "./firecracker-api.js";
import {
ensureBridge,
ensureNat,
createTap,
deleteTap,
macFromOctet,
} from "./network.js";
import {
ensureBaseImage,
ensureSshKeypair,
injectSshKey,
} from "./rootfs.js";
import { deleteTap } from "./network.js";
import { ensureBaseImage, ensureSshKeypair, injectSshKey } from "./rootfs.js";
import { waitForSsh } from "./ssh.js";
import { copyFileSync } from "node:fs";
import {
setupNetwork,
spawnFirecracker,
bootVM,
killFirecracker,
} from "./firecracker-vm.js";
function log(msg: string) {
process.stderr.write(`[snapshot] ${msg}\n`);
}
function waitForSocket(socketPath: string): Promise<void> {
return new Promise((resolve, reject) => {
const deadline = Date.now() + 5_000;
const check = () => {
if (existsSync(socketPath)) {
setTimeout(resolve, 200);
return;
}
if (Date.now() > deadline) {
reject(new Error("Firecracker socket did not appear"));
return;
}
setTimeout(check, 50);
};
check();
});
}
export function snapshotExists(): boolean {
return (
existsSync(CONFIG.snapshot.statePath) &&
@@ -61,47 +38,21 @@ export async function createSnapshot() {
injectSshKey(snap.rootfsPath);
log("Setting up network...");
ensureBridge();
ensureNat();
deleteTap(snap.tapDevice); // clean stale tap from previous run
createTap(snap.tapDevice);
setupNetwork(snap.tapDevice);
let proc: ChildProcess | null = null;
try {
log("Booting VM for snapshot...");
proc = spawn(CONFIG.firecrackerBin, ["--api-sock", socketPath], {
stdio: "pipe",
detached: false,
proc = await spawnFirecracker(socketPath);
await bootVM({
socketPath,
rootfsPath: snap.rootfsPath,
tapDevice: snap.tapDevice,
ip: snap.ip,
octet: snap.octet,
});
await waitForSocket(socketPath);
const bootArgs = [
"console=ttyS0",
"reboot=k",
"panic=1",
"pci=off",
"root=/dev/vda",
"rw",
`ip=${snap.ip}::${CONFIG.bridge.gateway}:${CONFIG.bridge.netmask}::eth0:off`,
].join(" ");
await api.putBootSource(socketPath, CONFIG.kernelPath, bootArgs);
await api.putDrive(socketPath, "rootfs", snap.rootfsPath);
await api.putNetworkInterface(
socketPath,
"eth0",
snap.tapDevice,
macFromOctet(snap.octet)
);
await api.putMachineConfig(
socketPath,
CONFIG.vm.vcpuCount,
CONFIG.vm.memSizeMib
);
await api.startInstance(socketPath);
log("Waiting for SSH...");
await waitForSsh(snap.ip);
@@ -116,13 +67,7 @@ export async function createSnapshot() {
log(` Memory: ${snap.memPath}`);
log(` Rootfs: ${snap.rootfsPath}`);
} finally {
if (proc && !proc.killed) {
proc.kill("SIGKILL");
}
try {
const { unlinkSync } = await import("node:fs");
unlinkSync(socketPath);
} catch {}
await killFirecracker(proc, socketPath, "SIGKILL");
deleteTap(snap.tapDevice);
}
}

157
src/vm.ts
View File

@@ -1,19 +1,11 @@
import { spawn, type ChildProcess } from "node:child_process";
import { existsSync, mkdirSync } from "node:fs";
import { type ChildProcess } from "node:child_process";
import { mkdirSync } from "node:fs";
import { join } from "node:path";
import { randomBytes } from "node:crypto";
import { CONFIG } from "./config.js";
import type { VMConfig, RunResult, RunOptions } from "./types.js";
import * as api from "./firecracker-api.js";
import {
ensureBridge,
ensureNat,
allocateIp,
releaseIp,
createTap,
deleteTap,
macFromOctet,
} from "./network.js";
import { allocateIp, releaseIp, deleteTap } from "./network.js";
import {
ensureBaseImage,
ensureSshKeypair,
@@ -24,6 +16,12 @@ import {
import { waitForSsh, execCommand } from "./ssh.js";
import { registerVm, unregisterVm } from "./cleanup.js";
import { snapshotExists } from "./snapshot.js";
import {
setupNetwork,
spawnFirecracker,
bootVM,
killFirecracker,
} from "./firecracker-vm.js";
function log(verbose: boolean, msg: string) {
if (verbose) process.stderr.write(`[fireclaw] ${msg}\n`);
@@ -42,7 +40,6 @@ export class VMInstance {
command: string,
opts: RunOptions = {}
): Promise<RunResult> {
// Try snapshot path first unless disabled
if (!opts.noSnapshot && snapshotExists()) {
return VMInstance.runFromSnapshot(command, opts);
}
@@ -65,33 +62,20 @@ export class VMInstance {
guestIp: snap.ip,
tapDevice: snap.tapDevice,
socketPath: join(CONFIG.socketDir, `${id}.sock`),
rootfsPath: "", // shared, not per-run
rootfsPath: "",
timeoutMs,
verbose,
};
const vm = new VMInstance(config);
vm.octet = 0; // no IP pool allocation for snapshot runs
vm.octet = 0;
registerVm(vm);
try {
log(verbose, `VM ${id}: restoring from snapshot...`);
ensureBridge();
ensureNat();
deleteTap(snap.tapDevice); // clean stale tap from previous run
createTap(snap.tapDevice);
setupNetwork(snap.tapDevice);
// Spawn firecracker and load snapshot
vm.process = spawn(
CONFIG.firecrackerBin,
["--api-sock", config.socketPath],
{ stdio: "pipe", detached: false }
);
vm.process.on("error", (err) => {
log(verbose, `Firecracker process error: ${err.message}`);
});
await vm.waitForSocket();
vm.process = await spawnFirecracker(config.socketPath);
await api.putSnapshotLoad(
config.socketPath,
snap.statePath,
@@ -124,16 +108,12 @@ export class VMInstance {
const verbose = opts.verbose ?? false;
const timeoutMs = opts.timeout ?? CONFIG.vm.defaultTimeoutMs;
// Pre-flight checks
ensureBaseImage();
ensureSshKeypair();
// Allocate resources
const { ip, octet } = allocateIp();
const tapDevice = `fctap${octet}`;
mkdirSync(CONFIG.socketDir, { recursive: true });
const config: VMConfig = {
id,
guestIp: ip,
@@ -154,13 +134,19 @@ export class VMInstance {
injectSshKey(config.rootfsPath);
log(verbose, `VM ${id}: creating tap ${tapDevice}...`);
ensureBridge();
ensureNat();
deleteTap(tapDevice); // clean stale tap from previous run
createTap(tapDevice);
setupNetwork(tapDevice);
log(verbose, `VM ${id}: booting...`);
await vm.boot(opts);
vm.process = await spawnFirecracker(config.socketPath);
await bootVM({
socketPath: config.socketPath,
rootfsPath: config.rootfsPath,
tapDevice,
ip,
octet,
vcpu: opts.vcpu,
mem: opts.mem,
});
log(verbose, `VM ${id}: waiting for SSH at ${ip}...`);
await waitForSsh(ip);
@@ -179,110 +165,17 @@ export class VMInstance {
}
}
private async boot(opts: RunOptions) {
const { config } = this;
const vcpu = opts.vcpu ?? CONFIG.vm.vcpuCount;
const mem = opts.mem ?? CONFIG.vm.memSizeMib;
// Spawn firecracker
this.process = spawn(
CONFIG.firecrackerBin,
["--api-sock", config.socketPath],
{
stdio: "pipe",
detached: false,
}
);
this.process.on("error", (err) => {
log(config.verbose, `Firecracker process error: ${err.message}`);
});
// Wait for socket
await this.waitForSocket();
// Configure via API
const bootArgs = [
"console=ttyS0",
"reboot=k",
"panic=1",
"pci=off",
"root=/dev/vda",
"rw",
`ip=${config.guestIp}::${CONFIG.bridge.gateway}:${CONFIG.bridge.netmask}::eth0:off`,
].join(" ");
await api.putBootSource(config.socketPath, CONFIG.kernelPath, bootArgs);
await api.putDrive(config.socketPath, "rootfs", config.rootfsPath);
await api.putNetworkInterface(
config.socketPath,
"eth0",
config.tapDevice,
macFromOctet(this.octet)
);
await api.putMachineConfig(config.socketPath, vcpu, mem);
await api.startInstance(config.socketPath);
}
private waitForSocket(): Promise<void> {
const socketPath = this.config.socketPath;
return new Promise((resolve, reject) => {
const deadline = Date.now() + 5_000;
const check = () => {
if (existsSync(socketPath)) {
setTimeout(resolve, 200);
return;
}
if (Date.now() > deadline) {
reject(new Error("Firecracker socket did not appear"));
return;
}
setTimeout(check, 50);
};
check();
});
}
async destroy() {
const { config } = this;
log(config.verbose, `VM ${config.id}: cleaning up...`);
// Kill firecracker
if (this.process && !this.process.killed) {
this.process.kill("SIGTERM");
await new Promise<void>((resolve) => {
const timer = setTimeout(() => {
if (this.process && !this.process.killed) {
this.process.kill("SIGKILL");
}
resolve();
}, 2_000);
this.process!.on("exit", () => {
clearTimeout(timer);
resolve();
});
});
}
// Clean up socket
try {
const { unlinkSync } = await import("node:fs");
unlinkSync(config.socketPath);
} catch {
// Already gone
}
// Clean up tap device
await killFirecracker(this.process, config.socketPath);
deleteTap(config.tapDevice);
// Release IP (skip for snapshot runs which don't allocate from pool)
if (this.octet > 0) {
releaseIp(this.octet);
}
// Delete rootfs copy (skip for snapshot runs which share rootfs)
if (config.rootfsPath) {
deleteRunCopy(config.rootfsPath);
}