fix: track reconnect task for clean shutdown

Reconnect backoff sleeps (up to 300s) were not cancellable, causing
SIGKILL on container stop. Now _schedule_reconnect spawns a tracked
task that stop() cancels, enabling graceful shutdown within the
podman timeout.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
user
2026-02-20 20:16:57 +01:00
parent 54218d2677
commit 532ceb3c3d

View File

@@ -156,6 +156,7 @@ class Network:
self._reconnect_attempt: int = 0 self._reconnect_attempt: int = 0
self._running: bool = False self._running: bool = False
self._read_task: asyncio.Task[None] | None = None self._read_task: asyncio.Task[None] | None = None
self._reconnect_task: asyncio.Task[None] | None = None
self._probation_task: asyncio.Task[None] | None = None self._probation_task: asyncio.Task[None] | None = None
# Transient nick used during registration/probation # Transient nick used during registration/probation
self._connect_nick: str = "" self._connect_nick: str = ""
@@ -187,10 +188,9 @@ class Network:
async def stop(self) -> None: async def stop(self) -> None:
"""Disconnect and stop reconnection.""" """Disconnect and stop reconnection."""
self._running = False self._running = False
if self._read_task and not self._read_task.done(): for task in (self._read_task, self._reconnect_task, self._probation_task):
self._read_task.cancel() if task and not task.done():
if self._probation_task and not self._probation_task.done(): task.cancel()
self._probation_task.cancel()
await self._disconnect() await self._disconnect()
async def send(self, msg: IRCMessage) -> None: async def send(self, msg: IRCMessage) -> None:
@@ -242,7 +242,7 @@ class Network:
log.exception("[%s] connection failed", self.cfg.name) log.exception("[%s] connection failed", self.cfg.name)
self.state = State.DISCONNECTED self.state = State.DISCONNECTED
if self._running: if self._running:
await self._schedule_reconnect() self._schedule_reconnect()
async def _disconnect(self) -> None: async def _disconnect(self) -> None:
"""Close the connection.""" """Close the connection."""
@@ -259,7 +259,11 @@ class Network:
self._reader = None self._reader = None
self._writer = None self._writer = None
async def _schedule_reconnect(self) -> None: def _schedule_reconnect(self) -> None:
"""Schedule a reconnect after exponential backoff."""
self._reconnect_task = asyncio.create_task(self._reconnect_wait())
async def _reconnect_wait(self) -> None:
"""Wait with exponential backoff, then reconnect.""" """Wait with exponential backoff, then reconnect."""
delay = BACKOFF_STEPS[min(self._reconnect_attempt, len(BACKOFF_STEPS) - 1)] delay = BACKOFF_STEPS[min(self._reconnect_attempt, len(BACKOFF_STEPS) - 1)]
self._reconnect_attempt += 1 self._reconnect_attempt += 1
@@ -267,7 +271,10 @@ class Network:
"[%s] reconnecting in %ds (attempt %d)", "[%s] reconnecting in %ds (attempt %d)",
self.cfg.name, delay, self._reconnect_attempt, self.cfg.name, delay, self._reconnect_attempt,
) )
await asyncio.sleep(delay) try:
await asyncio.sleep(delay)
except asyncio.CancelledError:
return
if self._running: if self._running:
await self._connect() await self._connect()
@@ -300,7 +307,7 @@ class Network:
finally: finally:
await self._disconnect() await self._disconnect()
if self._running: if self._running:
await self._schedule_reconnect() self._schedule_reconnect()
async def _enter_probation(self) -> None: async def _enter_probation(self) -> None:
"""Start probation period after registration. Survive = ready.""" """Start probation period after registration. Survive = ready."""