Add thread safety and agent health checks
- IRC socket writes protected by threading.Lock in agent.py - Overseer runs periodic health check (30s interval) - Detects dead agent processes, cleans up resources, announces in #agents Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -118,6 +118,7 @@ class IRCClient:
|
|||||||
self.nick = nick
|
self.nick = nick
|
||||||
self.sock = None
|
self.sock = None
|
||||||
self.buf = ""
|
self.buf = ""
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
def connect(self):
|
def connect(self):
|
||||||
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
@@ -127,7 +128,8 @@ class IRCClient:
|
|||||||
self.send(f"USER {self.nick} 0 * :Fireclaw Agent")
|
self.send(f"USER {self.nick} 0 * :Fireclaw Agent")
|
||||||
|
|
||||||
def send(self, msg):
|
def send(self, msg):
|
||||||
self.sock.sendall(f"{msg}\r\n".encode("utf-8"))
|
with self._lock:
|
||||||
|
self.sock.sendall(f"{msg}\r\n".encode("utf-8"))
|
||||||
|
|
||||||
def join(self, channel):
|
def join(self, channel):
|
||||||
self.send(f"JOIN {channel}")
|
self.send(f"JOIN {channel}")
|
||||||
|
|||||||
@@ -40,6 +40,7 @@ export async function runOverseer(config: OverseerConfig) {
|
|||||||
log(`Cleaned ${cleaned.length} dead agent(s): ${cleaned.join(", ")}`);
|
log(`Cleaned ${cleaned.length} dead agent(s): ${cleaned.join(", ")}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let knownAgents = new Set(listAgents().map((a) => a.name));
|
||||||
const bot = new IRC.Client();
|
const bot = new IRC.Client();
|
||||||
|
|
||||||
bot.connect({
|
bot.connect({
|
||||||
@@ -76,6 +77,7 @@ export async function runOverseer(config: OverseerConfig) {
|
|||||||
const name = parts[2];
|
const name = parts[2];
|
||||||
bot.say(event.target, `Invoking agent "${name ?? template}" from template "${template}"...`);
|
bot.say(event.target, `Invoking agent "${name ?? template}" from template "${template}"...`);
|
||||||
const info = await startAgent(template, { name });
|
const info = await startAgent(template, { name });
|
||||||
|
knownAgents.add(info.name);
|
||||||
bot.say(
|
bot.say(
|
||||||
event.target,
|
event.target,
|
||||||
`Agent "${info.name}" started: ${info.nick} [${info.model}] (${info.ip})`
|
`Agent "${info.name}" started: ${info.nick} [${info.model}] (${info.ip})`
|
||||||
@@ -90,6 +92,7 @@ export async function runOverseer(config: OverseerConfig) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
await stopAgent(name);
|
await stopAgent(name);
|
||||||
|
knownAgents.delete(name);
|
||||||
bot.say(event.target, `Agent "${name}" destroyed.`);
|
bot.say(event.target, `Agent "${name}" destroyed.`);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -184,5 +187,23 @@ export async function runOverseer(config: OverseerConfig) {
|
|||||||
process.on("SIGINT", shutdown);
|
process.on("SIGINT", shutdown);
|
||||||
process.on("SIGTERM", shutdown);
|
process.on("SIGTERM", shutdown);
|
||||||
|
|
||||||
|
// Periodic health check — detect and clean up dead agents
|
||||||
|
const HEALTH_CHECK_INTERVAL = 30_000;
|
||||||
|
const healthCheck = () => {
|
||||||
|
const agents = listAgents(); // listAgents already cleans dead entries
|
||||||
|
// Check if any agents were cleaned (listAgents logs and removes dead ones)
|
||||||
|
// We just need to report the ones that disappeared
|
||||||
|
const currentNames = new Set(agents.map((a) => a.name));
|
||||||
|
for (const name of knownAgents) {
|
||||||
|
if (!currentNames.has(name)) {
|
||||||
|
log(`Agent "${name}" died, cleaned up.`);
|
||||||
|
bot.say("#agents", `Agent "${name}" has died and been cleaned up.`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
knownAgents = currentNames;
|
||||||
|
};
|
||||||
|
|
||||||
|
setInterval(healthCheck, HEALTH_CHECK_INTERVAL);
|
||||||
|
|
||||||
log("Overseer started. Waiting for commands...");
|
log("Overseer started. Waiting for commands...");
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user