Add thread safety and agent health checks
- IRC socket writes protected by threading.Lock in agent.py - Overseer runs periodic health check (30s interval) - Detects dead agent processes, cleans up resources, announces in #agents Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -118,6 +118,7 @@ class IRCClient:
|
||||
self.nick = nick
|
||||
self.sock = None
|
||||
self.buf = ""
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def connect(self):
|
||||
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
@@ -127,7 +128,8 @@ class IRCClient:
|
||||
self.send(f"USER {self.nick} 0 * :Fireclaw Agent")
|
||||
|
||||
def send(self, msg):
|
||||
self.sock.sendall(f"{msg}\r\n".encode("utf-8"))
|
||||
with self._lock:
|
||||
self.sock.sendall(f"{msg}\r\n".encode("utf-8"))
|
||||
|
||||
def join(self, channel):
|
||||
self.send(f"JOIN {channel}")
|
||||
|
||||
@@ -40,6 +40,7 @@ export async function runOverseer(config: OverseerConfig) {
|
||||
log(`Cleaned ${cleaned.length} dead agent(s): ${cleaned.join(", ")}`);
|
||||
}
|
||||
|
||||
let knownAgents = new Set(listAgents().map((a) => a.name));
|
||||
const bot = new IRC.Client();
|
||||
|
||||
bot.connect({
|
||||
@@ -76,6 +77,7 @@ export async function runOverseer(config: OverseerConfig) {
|
||||
const name = parts[2];
|
||||
bot.say(event.target, `Invoking agent "${name ?? template}" from template "${template}"...`);
|
||||
const info = await startAgent(template, { name });
|
||||
knownAgents.add(info.name);
|
||||
bot.say(
|
||||
event.target,
|
||||
`Agent "${info.name}" started: ${info.nick} [${info.model}] (${info.ip})`
|
||||
@@ -90,6 +92,7 @@ export async function runOverseer(config: OverseerConfig) {
|
||||
return;
|
||||
}
|
||||
await stopAgent(name);
|
||||
knownAgents.delete(name);
|
||||
bot.say(event.target, `Agent "${name}" destroyed.`);
|
||||
break;
|
||||
}
|
||||
@@ -184,5 +187,23 @@ export async function runOverseer(config: OverseerConfig) {
|
||||
process.on("SIGINT", shutdown);
|
||||
process.on("SIGTERM", shutdown);
|
||||
|
||||
// Periodic health check — detect and clean up dead agents
|
||||
const HEALTH_CHECK_INTERVAL = 30_000;
|
||||
const healthCheck = () => {
|
||||
const agents = listAgents(); // listAgents already cleans dead entries
|
||||
// Check if any agents were cleaned (listAgents logs and removes dead ones)
|
||||
// We just need to report the ones that disappeared
|
||||
const currentNames = new Set(agents.map((a) => a.name));
|
||||
for (const name of knownAgents) {
|
||||
if (!currentNames.has(name)) {
|
||||
log(`Agent "${name}" died, cleaned up.`);
|
||||
bot.say("#agents", `Agent "${name}" has died and been cleaned up.`);
|
||||
}
|
||||
}
|
||||
knownAgents = currentNames;
|
||||
};
|
||||
|
||||
setInterval(healthCheck, HEALTH_CHECK_INTERVAL);
|
||||
|
||||
log("Overseer started. Waiting for commands...");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user