feat: add IRC 512-byte message truncation

Split outgoing messages at UTF-8 safe boundaries to comply with
RFC 2812 line limit. Accounts for PRIVMSG overhead and CRLF.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
user
2026-02-15 03:26:57 +01:00
parent f1d4975a4a
commit 1a6c6de38b
2 changed files with 47 additions and 8 deletions

View File

@@ -6,21 +6,42 @@ import asyncio
import base64 import base64
import fnmatch import fnmatch
import logging import logging
import random
import time import time
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from derp import __version__ from derp import __version__
from derp.irc import IRCConnection, Message, format_msg, parse from derp.irc import _MAX_IRC_LINE, IRCConnection, Message, format_msg, parse
from derp.plugin import Handler, PluginRegistry from derp.plugin import Handler, PluginRegistry
from derp.state import StateStore from derp.state import StateStore
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
RECONNECT_DELAY = 30
_AMBIGUOUS = object() # sentinel for ambiguous prefix matches _AMBIGUOUS = object() # sentinel for ambiguous prefix matches
def _split_utf8(text: str, max_bytes: int) -> list[str]:
"""Split text into chunks that fit within max_bytes when UTF-8 encoded."""
encoded = text.encode("utf-8")
if len(encoded) <= max_bytes:
return [text]
chunks: list[str] = []
while encoded:
if len(encoded) <= max_bytes:
chunks.append(encoded.decode("utf-8"))
break
cut = max_bytes
# Walk backward to avoid breaking a multi-byte codepoint
while cut > 0 and (encoded[cut] & 0xC0) == 0x80:
cut -= 1
if cut == 0:
cut = max_bytes # degenerate: force split
chunks.append(encoded[:cut].decode("utf-8"))
encoded = encoded[cut:]
return chunks
class _TokenBucket: class _TokenBucket:
"""Token bucket rate limiter for outgoing messages.""" """Token bucket rate limiter for outgoing messages."""
@@ -62,6 +83,7 @@ class Bot:
self._running = False self._running = False
self._started: float = time.monotonic() self._started: float = time.monotonic()
self._tasks: set[asyncio.Task] = set() self._tasks: set[asyncio.Task] = set()
self._reconnect_delay: float = 5.0
self._admins: list[str] = config.get("bot", {}).get("admins", []) self._admins: list[str] = config.get("bot", {}).get("admins", [])
self._opers: set[str] = set() # hostmasks of known IRC operators self._opers: set[str] = set() # hostmasks of known IRC operators
self._caps: set[str] = set() # negotiated IRCv3 caps self._caps: set[str] = set() # negotiated IRCv3 caps
@@ -74,16 +96,24 @@ class Bot:
) )
async def start(self) -> None: async def start(self) -> None:
"""Connect, register, join channels, and enter the main loop.""" """Connect, register, join channels, and enter the main loop.
Uses exponential backoff with jitter on reconnect (5s -> 300s cap).
Delay resets after a successful connection.
"""
self._running = True self._running = True
while self._running: while self._running:
try: try:
await self._connect_and_run() await self._connect_and_run()
self._reconnect_delay = 5.0 # reset on clean run
except (OSError, ConnectionError) as exc: except (OSError, ConnectionError) as exc:
log.error("connection lost: %s", exc) log.error("connection lost: %s", exc)
if self._running: if self._running:
log.info("reconnecting in %ds...", RECONNECT_DELAY) jitter = self._reconnect_delay * 0.25 * (2 * random.random() - 1)
await asyncio.sleep(RECONNECT_DELAY) delay = self._reconnect_delay + jitter
log.info("reconnecting in %.0fs...", delay)
await asyncio.sleep(delay)
self._reconnect_delay = min(self._reconnect_delay * 2, 300.0)
async def _connect_and_run(self) -> None: async def _connect_and_run(self) -> None:
"""Single connection lifecycle.""" """Single connection lifecycle."""
@@ -358,10 +388,17 @@ class Bot:
# -- Public API for plugins -- # -- Public API for plugins --
async def send(self, target: str, text: str) -> None: async def send(self, target: str, text: str) -> None:
"""Send a PRIVMSG to a target (channel or nick), rate-limited.""" """Send a PRIVMSG to a target (channel or nick), rate-limited.
Long lines are split at UTF-8 safe boundaries to stay within
the IRC 512-byte line limit (RFC 2812).
"""
overhead = len(f"PRIVMSG {target} :".encode("utf-8")) + 2 # +CRLF
max_text = _MAX_IRC_LINE - overhead
for line in text.split("\n"): for line in text.split("\n"):
await self._bucket.acquire() for chunk in _split_utf8(line, max_text):
await self.conn.send(format_msg("PRIVMSG", target, line)) await self._bucket.acquire()
await self.conn.send(format_msg("PRIVMSG", target, chunk))
async def reply(self, msg: Message, text: str) -> None: async def reply(self, msg: Message, text: str) -> None:
"""Reply to the source of a message (channel or PM).""" """Reply to the source of a message (channel or PM)."""

View File

@@ -9,6 +9,8 @@ from dataclasses import dataclass
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
_MAX_IRC_LINE = 512 # bytes including \r\n (RFC 2812)
def _unescape_tag_value(value: str) -> str: def _unescape_tag_value(value: str) -> str:
"""Unescape an IRCv3 message tag value per the spec.""" """Unescape an IRCv3 message tag value per the spec."""