feat: add IRC 512-byte message truncation

Split outgoing messages at UTF-8 safe boundaries to comply with
RFC 2812 line limit. Accounts for PRIVMSG overhead and CRLF.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
user
2026-02-15 03:26:57 +01:00
parent f1d4975a4a
commit 1a6c6de38b
2 changed files with 47 additions and 8 deletions

View File

@@ -6,21 +6,42 @@ import asyncio
import base64
import fnmatch
import logging
import random
import time
from datetime import datetime, timezone
from pathlib import Path
from derp import __version__
from derp.irc import IRCConnection, Message, format_msg, parse
from derp.irc import _MAX_IRC_LINE, IRCConnection, Message, format_msg, parse
from derp.plugin import Handler, PluginRegistry
from derp.state import StateStore
log = logging.getLogger(__name__)
RECONNECT_DELAY = 30
_AMBIGUOUS = object() # sentinel for ambiguous prefix matches
def _split_utf8(text: str, max_bytes: int) -> list[str]:
"""Split text into chunks that fit within max_bytes when UTF-8 encoded."""
encoded = text.encode("utf-8")
if len(encoded) <= max_bytes:
return [text]
chunks: list[str] = []
while encoded:
if len(encoded) <= max_bytes:
chunks.append(encoded.decode("utf-8"))
break
cut = max_bytes
# Walk backward to avoid breaking a multi-byte codepoint
while cut > 0 and (encoded[cut] & 0xC0) == 0x80:
cut -= 1
if cut == 0:
cut = max_bytes # degenerate: force split
chunks.append(encoded[:cut].decode("utf-8"))
encoded = encoded[cut:]
return chunks
class _TokenBucket:
"""Token bucket rate limiter for outgoing messages."""
@@ -62,6 +83,7 @@ class Bot:
self._running = False
self._started: float = time.monotonic()
self._tasks: set[asyncio.Task] = set()
self._reconnect_delay: float = 5.0
self._admins: list[str] = config.get("bot", {}).get("admins", [])
self._opers: set[str] = set() # hostmasks of known IRC operators
self._caps: set[str] = set() # negotiated IRCv3 caps
@@ -74,16 +96,24 @@ class Bot:
)
async def start(self) -> None:
"""Connect, register, join channels, and enter the main loop."""
"""Connect, register, join channels, and enter the main loop.
Uses exponential backoff with jitter on reconnect (5s -> 300s cap).
Delay resets after a successful connection.
"""
self._running = True
while self._running:
try:
await self._connect_and_run()
self._reconnect_delay = 5.0 # reset on clean run
except (OSError, ConnectionError) as exc:
log.error("connection lost: %s", exc)
if self._running:
log.info("reconnecting in %ds...", RECONNECT_DELAY)
await asyncio.sleep(RECONNECT_DELAY)
jitter = self._reconnect_delay * 0.25 * (2 * random.random() - 1)
delay = self._reconnect_delay + jitter
log.info("reconnecting in %.0fs...", delay)
await asyncio.sleep(delay)
self._reconnect_delay = min(self._reconnect_delay * 2, 300.0)
async def _connect_and_run(self) -> None:
"""Single connection lifecycle."""
@@ -358,10 +388,17 @@ class Bot:
# -- Public API for plugins --
async def send(self, target: str, text: str) -> None:
"""Send a PRIVMSG to a target (channel or nick), rate-limited."""
"""Send a PRIVMSG to a target (channel or nick), rate-limited.
Long lines are split at UTF-8 safe boundaries to stay within
the IRC 512-byte line limit (RFC 2812).
"""
overhead = len(f"PRIVMSG {target} :".encode("utf-8")) + 2 # +CRLF
max_text = _MAX_IRC_LINE - overhead
for line in text.split("\n"):
await self._bucket.acquire()
await self.conn.send(format_msg("PRIVMSG", target, line))
for chunk in _split_utf8(line, max_text):
await self._bucket.acquire()
await self.conn.send(format_msg("PRIVMSG", target, chunk))
async def reply(self, msg: Message, text: str) -> None:
"""Reply to the source of a message (channel or PM)."""

View File

@@ -9,6 +9,8 @@ from dataclasses import dataclass
log = logging.getLogger(__name__)
_MAX_IRC_LINE = 512 # bytes including \r\n (RFC 2812)
def _unescape_tag_value(value: str) -> str:
"""Unescape an IRCv3 message tag value per the spec."""