From 6fc6e899171ff545913951a3dcda7280754b916d Mon Sep 17 00:00:00 2001 From: ansible Date: Tue, 7 Apr 2026 13:41:50 +0000 Subject: [PATCH] Add thread safety and agent health checks - IRC socket writes protected by threading.Lock in agent.py - Overseer runs periodic health check (30s interval) - Detects dead agent processes, cleans up resources, announces in #agents Co-Authored-By: Claude Opus 4.6 (1M context) --- agent/agent.py | 4 +++- src/overseer.ts | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/agent/agent.py b/agent/agent.py index ec7a334..106bbd4 100644 --- a/agent/agent.py +++ b/agent/agent.py @@ -118,6 +118,7 @@ class IRCClient: self.nick = nick self.sock = None self.buf = "" + self._lock = threading.Lock() def connect(self): self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) @@ -127,7 +128,8 @@ class IRCClient: self.send(f"USER {self.nick} 0 * :Fireclaw Agent") def send(self, msg): - self.sock.sendall(f"{msg}\r\n".encode("utf-8")) + with self._lock: + self.sock.sendall(f"{msg}\r\n".encode("utf-8")) def join(self, channel): self.send(f"JOIN {channel}") diff --git a/src/overseer.ts b/src/overseer.ts index 345d5ca..dbefca8 100644 --- a/src/overseer.ts +++ b/src/overseer.ts @@ -40,6 +40,7 @@ export async function runOverseer(config: OverseerConfig) { log(`Cleaned ${cleaned.length} dead agent(s): ${cleaned.join(", ")}`); } + let knownAgents = new Set(listAgents().map((a) => a.name)); const bot = new IRC.Client(); bot.connect({ @@ -76,6 +77,7 @@ export async function runOverseer(config: OverseerConfig) { const name = parts[2]; bot.say(event.target, `Invoking agent "${name ?? template}" from template "${template}"...`); const info = await startAgent(template, { name }); + knownAgents.add(info.name); bot.say( event.target, `Agent "${info.name}" started: ${info.nick} [${info.model}] (${info.ip})` @@ -90,6 +92,7 @@ export async function runOverseer(config: OverseerConfig) { return; } await stopAgent(name); + knownAgents.delete(name); bot.say(event.target, `Agent "${name}" destroyed.`); break; } @@ -184,5 +187,23 @@ export async function runOverseer(config: OverseerConfig) { process.on("SIGINT", shutdown); process.on("SIGTERM", shutdown); + // Periodic health check — detect and clean up dead agents + const HEALTH_CHECK_INTERVAL = 30_000; + const healthCheck = () => { + const agents = listAgents(); // listAgents already cleans dead entries + // Check if any agents were cleaned (listAgents logs and removes dead ones) + // We just need to report the ones that disappeared + const currentNames = new Set(agents.map((a) => a.name)); + for (const name of knownAgents) { + if (!currentNames.has(name)) { + log(`Agent "${name}" died, cleaned up.`); + bot.say("#agents", `Agent "${name}" has died and been cleaned up.`); + } + } + knownAgents = currentNames; + }; + + setInterval(healthCheck, HEALTH_CHECK_INTERVAL); + log("Overseer started. Waiting for commands..."); }