feat: add IRC 512-byte message truncation
Split outgoing messages at UTF-8 safe boundaries to comply with RFC 2812 line limit. Accounts for PRIVMSG overhead and CRLF. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -6,21 +6,42 @@ import asyncio
|
||||
import base64
|
||||
import fnmatch
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from derp import __version__
|
||||
from derp.irc import IRCConnection, Message, format_msg, parse
|
||||
from derp.irc import _MAX_IRC_LINE, IRCConnection, Message, format_msg, parse
|
||||
from derp.plugin import Handler, PluginRegistry
|
||||
from derp.state import StateStore
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
RECONNECT_DELAY = 30
|
||||
_AMBIGUOUS = object() # sentinel for ambiguous prefix matches
|
||||
|
||||
|
||||
def _split_utf8(text: str, max_bytes: int) -> list[str]:
|
||||
"""Split text into chunks that fit within max_bytes when UTF-8 encoded."""
|
||||
encoded = text.encode("utf-8")
|
||||
if len(encoded) <= max_bytes:
|
||||
return [text]
|
||||
chunks: list[str] = []
|
||||
while encoded:
|
||||
if len(encoded) <= max_bytes:
|
||||
chunks.append(encoded.decode("utf-8"))
|
||||
break
|
||||
cut = max_bytes
|
||||
# Walk backward to avoid breaking a multi-byte codepoint
|
||||
while cut > 0 and (encoded[cut] & 0xC0) == 0x80:
|
||||
cut -= 1
|
||||
if cut == 0:
|
||||
cut = max_bytes # degenerate: force split
|
||||
chunks.append(encoded[:cut].decode("utf-8"))
|
||||
encoded = encoded[cut:]
|
||||
return chunks
|
||||
|
||||
|
||||
class _TokenBucket:
|
||||
"""Token bucket rate limiter for outgoing messages."""
|
||||
|
||||
@@ -62,6 +83,7 @@ class Bot:
|
||||
self._running = False
|
||||
self._started: float = time.monotonic()
|
||||
self._tasks: set[asyncio.Task] = set()
|
||||
self._reconnect_delay: float = 5.0
|
||||
self._admins: list[str] = config.get("bot", {}).get("admins", [])
|
||||
self._opers: set[str] = set() # hostmasks of known IRC operators
|
||||
self._caps: set[str] = set() # negotiated IRCv3 caps
|
||||
@@ -74,16 +96,24 @@ class Bot:
|
||||
)
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Connect, register, join channels, and enter the main loop."""
|
||||
"""Connect, register, join channels, and enter the main loop.
|
||||
|
||||
Uses exponential backoff with jitter on reconnect (5s -> 300s cap).
|
||||
Delay resets after a successful connection.
|
||||
"""
|
||||
self._running = True
|
||||
while self._running:
|
||||
try:
|
||||
await self._connect_and_run()
|
||||
self._reconnect_delay = 5.0 # reset on clean run
|
||||
except (OSError, ConnectionError) as exc:
|
||||
log.error("connection lost: %s", exc)
|
||||
if self._running:
|
||||
log.info("reconnecting in %ds...", RECONNECT_DELAY)
|
||||
await asyncio.sleep(RECONNECT_DELAY)
|
||||
jitter = self._reconnect_delay * 0.25 * (2 * random.random() - 1)
|
||||
delay = self._reconnect_delay + jitter
|
||||
log.info("reconnecting in %.0fs...", delay)
|
||||
await asyncio.sleep(delay)
|
||||
self._reconnect_delay = min(self._reconnect_delay * 2, 300.0)
|
||||
|
||||
async def _connect_and_run(self) -> None:
|
||||
"""Single connection lifecycle."""
|
||||
@@ -358,10 +388,17 @@ class Bot:
|
||||
# -- Public API for plugins --
|
||||
|
||||
async def send(self, target: str, text: str) -> None:
|
||||
"""Send a PRIVMSG to a target (channel or nick), rate-limited."""
|
||||
"""Send a PRIVMSG to a target (channel or nick), rate-limited.
|
||||
|
||||
Long lines are split at UTF-8 safe boundaries to stay within
|
||||
the IRC 512-byte line limit (RFC 2812).
|
||||
"""
|
||||
overhead = len(f"PRIVMSG {target} :".encode("utf-8")) + 2 # +CRLF
|
||||
max_text = _MAX_IRC_LINE - overhead
|
||||
for line in text.split("\n"):
|
||||
await self._bucket.acquire()
|
||||
await self.conn.send(format_msg("PRIVMSG", target, line))
|
||||
for chunk in _split_utf8(line, max_text):
|
||||
await self._bucket.acquire()
|
||||
await self.conn.send(format_msg("PRIVMSG", target, chunk))
|
||||
|
||||
async def reply(self, msg: Message, text: str) -> None:
|
||||
"""Reply to the source of a message (channel or PM)."""
|
||||
|
||||
@@ -9,6 +9,8 @@ from dataclasses import dataclass
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_MAX_IRC_LINE = 512 # bytes including \r\n (RFC 2812)
|
||||
|
||||
|
||||
def _unescape_tag_value(value: str) -> str:
|
||||
"""Unescape an IRCv3 message tag value per the spec."""
|
||||
|
||||
Reference in New Issue
Block a user