diff --git a/src/derp/bot.py b/src/derp/bot.py index 780fe1b..d8e6641 100644 --- a/src/derp/bot.py +++ b/src/derp/bot.py @@ -6,21 +6,42 @@ import asyncio import base64 import fnmatch import logging +import random import time from datetime import datetime, timezone from pathlib import Path from derp import __version__ -from derp.irc import IRCConnection, Message, format_msg, parse +from derp.irc import _MAX_IRC_LINE, IRCConnection, Message, format_msg, parse from derp.plugin import Handler, PluginRegistry from derp.state import StateStore log = logging.getLogger(__name__) -RECONNECT_DELAY = 30 _AMBIGUOUS = object() # sentinel for ambiguous prefix matches +def _split_utf8(text: str, max_bytes: int) -> list[str]: + """Split text into chunks that fit within max_bytes when UTF-8 encoded.""" + encoded = text.encode("utf-8") + if len(encoded) <= max_bytes: + return [text] + chunks: list[str] = [] + while encoded: + if len(encoded) <= max_bytes: + chunks.append(encoded.decode("utf-8")) + break + cut = max_bytes + # Walk backward to avoid breaking a multi-byte codepoint + while cut > 0 and (encoded[cut] & 0xC0) == 0x80: + cut -= 1 + if cut == 0: + cut = max_bytes # degenerate: force split + chunks.append(encoded[:cut].decode("utf-8")) + encoded = encoded[cut:] + return chunks + + class _TokenBucket: """Token bucket rate limiter for outgoing messages.""" @@ -62,6 +83,7 @@ class Bot: self._running = False self._started: float = time.monotonic() self._tasks: set[asyncio.Task] = set() + self._reconnect_delay: float = 5.0 self._admins: list[str] = config.get("bot", {}).get("admins", []) self._opers: set[str] = set() # hostmasks of known IRC operators self._caps: set[str] = set() # negotiated IRCv3 caps @@ -74,16 +96,24 @@ class Bot: ) async def start(self) -> None: - """Connect, register, join channels, and enter the main loop.""" + """Connect, register, join channels, and enter the main loop. + + Uses exponential backoff with jitter on reconnect (5s -> 300s cap). + Delay resets after a successful connection. + """ self._running = True while self._running: try: await self._connect_and_run() + self._reconnect_delay = 5.0 # reset on clean run except (OSError, ConnectionError) as exc: log.error("connection lost: %s", exc) if self._running: - log.info("reconnecting in %ds...", RECONNECT_DELAY) - await asyncio.sleep(RECONNECT_DELAY) + jitter = self._reconnect_delay * 0.25 * (2 * random.random() - 1) + delay = self._reconnect_delay + jitter + log.info("reconnecting in %.0fs...", delay) + await asyncio.sleep(delay) + self._reconnect_delay = min(self._reconnect_delay * 2, 300.0) async def _connect_and_run(self) -> None: """Single connection lifecycle.""" @@ -358,10 +388,17 @@ class Bot: # -- Public API for plugins -- async def send(self, target: str, text: str) -> None: - """Send a PRIVMSG to a target (channel or nick), rate-limited.""" + """Send a PRIVMSG to a target (channel or nick), rate-limited. + + Long lines are split at UTF-8 safe boundaries to stay within + the IRC 512-byte line limit (RFC 2812). + """ + overhead = len(f"PRIVMSG {target} :".encode("utf-8")) + 2 # +CRLF + max_text = _MAX_IRC_LINE - overhead for line in text.split("\n"): - await self._bucket.acquire() - await self.conn.send(format_msg("PRIVMSG", target, line)) + for chunk in _split_utf8(line, max_text): + await self._bucket.acquire() + await self.conn.send(format_msg("PRIVMSG", target, chunk)) async def reply(self, msg: Message, text: str) -> None: """Reply to the source of a message (channel or PM).""" diff --git a/src/derp/irc.py b/src/derp/irc.py index b950394..6f9a737 100644 --- a/src/derp/irc.py +++ b/src/derp/irc.py @@ -9,6 +9,8 @@ from dataclasses import dataclass log = logging.getLogger(__name__) +_MAX_IRC_LINE = 512 # bytes including \r\n (RFC 2812) + def _unescape_tag_value(value: str) -> str: """Unescape an IRCv3 message tag value per the spec."""