Add thread safety and agent health checks

- IRC socket writes protected by threading.Lock in agent.py
- Overseer runs periodic health check (30s interval)
- Detects dead agent processes, cleans up resources, announces in #agents

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-07 13:41:50 +00:00
parent ff694d12f6
commit 6fc6e89917
2 changed files with 24 additions and 1 deletions

View File

@@ -118,6 +118,7 @@ class IRCClient:
self.nick = nick
self.sock = None
self.buf = ""
self._lock = threading.Lock()
def connect(self):
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -127,7 +128,8 @@ class IRCClient:
self.send(f"USER {self.nick} 0 * :Fireclaw Agent")
def send(self, msg):
self.sock.sendall(f"{msg}\r\n".encode("utf-8"))
with self._lock:
self.sock.sendall(f"{msg}\r\n".encode("utf-8"))
def join(self, channel):
self.send(f"JOIN {channel}")

View File

@@ -40,6 +40,7 @@ export async function runOverseer(config: OverseerConfig) {
log(`Cleaned ${cleaned.length} dead agent(s): ${cleaned.join(", ")}`);
}
let knownAgents = new Set(listAgents().map((a) => a.name));
const bot = new IRC.Client();
bot.connect({
@@ -76,6 +77,7 @@ export async function runOverseer(config: OverseerConfig) {
const name = parts[2];
bot.say(event.target, `Invoking agent "${name ?? template}" from template "${template}"...`);
const info = await startAgent(template, { name });
knownAgents.add(info.name);
bot.say(
event.target,
`Agent "${info.name}" started: ${info.nick} [${info.model}] (${info.ip})`
@@ -90,6 +92,7 @@ export async function runOverseer(config: OverseerConfig) {
return;
}
await stopAgent(name);
knownAgents.delete(name);
bot.say(event.target, `Agent "${name}" destroyed.`);
break;
}
@@ -184,5 +187,23 @@ export async function runOverseer(config: OverseerConfig) {
process.on("SIGINT", shutdown);
process.on("SIGTERM", shutdown);
// Periodic health check — detect and clean up dead agents
const HEALTH_CHECK_INTERVAL = 30_000;
const healthCheck = () => {
const agents = listAgents(); // listAgents already cleans dead entries
// Check if any agents were cleaned (listAgents logs and removes dead ones)
// We just need to report the ones that disappeared
const currentNames = new Set(agents.map((a) => a.name));
for (const name of knownAgents) {
if (!currentNames.has(name)) {
log(`Agent "${name}" died, cleaned up.`);
bot.say("#agents", `Agent "${name}" has died and been cleaned up.`);
}
}
knownAgents = currentNames;
};
setInterval(healthCheck, HEALTH_CHECK_INTERVAL);
log("Overseer started. Waiting for commands...");
}