feat: add IRC 512-byte message truncation
Split outgoing messages at UTF-8 safe boundaries to comply with RFC 2812 line limit. Accounts for PRIVMSG overhead and CRLF. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -6,21 +6,42 @@ import asyncio
|
|||||||
import base64
|
import base64
|
||||||
import fnmatch
|
import fnmatch
|
||||||
import logging
|
import logging
|
||||||
|
import random
|
||||||
import time
|
import time
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from derp import __version__
|
from derp import __version__
|
||||||
from derp.irc import IRCConnection, Message, format_msg, parse
|
from derp.irc import _MAX_IRC_LINE, IRCConnection, Message, format_msg, parse
|
||||||
from derp.plugin import Handler, PluginRegistry
|
from derp.plugin import Handler, PluginRegistry
|
||||||
from derp.state import StateStore
|
from derp.state import StateStore
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
RECONNECT_DELAY = 30
|
|
||||||
_AMBIGUOUS = object() # sentinel for ambiguous prefix matches
|
_AMBIGUOUS = object() # sentinel for ambiguous prefix matches
|
||||||
|
|
||||||
|
|
||||||
|
def _split_utf8(text: str, max_bytes: int) -> list[str]:
|
||||||
|
"""Split text into chunks that fit within max_bytes when UTF-8 encoded."""
|
||||||
|
encoded = text.encode("utf-8")
|
||||||
|
if len(encoded) <= max_bytes:
|
||||||
|
return [text]
|
||||||
|
chunks: list[str] = []
|
||||||
|
while encoded:
|
||||||
|
if len(encoded) <= max_bytes:
|
||||||
|
chunks.append(encoded.decode("utf-8"))
|
||||||
|
break
|
||||||
|
cut = max_bytes
|
||||||
|
# Walk backward to avoid breaking a multi-byte codepoint
|
||||||
|
while cut > 0 and (encoded[cut] & 0xC0) == 0x80:
|
||||||
|
cut -= 1
|
||||||
|
if cut == 0:
|
||||||
|
cut = max_bytes # degenerate: force split
|
||||||
|
chunks.append(encoded[:cut].decode("utf-8"))
|
||||||
|
encoded = encoded[cut:]
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
class _TokenBucket:
|
class _TokenBucket:
|
||||||
"""Token bucket rate limiter for outgoing messages."""
|
"""Token bucket rate limiter for outgoing messages."""
|
||||||
|
|
||||||
@@ -62,6 +83,7 @@ class Bot:
|
|||||||
self._running = False
|
self._running = False
|
||||||
self._started: float = time.monotonic()
|
self._started: float = time.monotonic()
|
||||||
self._tasks: set[asyncio.Task] = set()
|
self._tasks: set[asyncio.Task] = set()
|
||||||
|
self._reconnect_delay: float = 5.0
|
||||||
self._admins: list[str] = config.get("bot", {}).get("admins", [])
|
self._admins: list[str] = config.get("bot", {}).get("admins", [])
|
||||||
self._opers: set[str] = set() # hostmasks of known IRC operators
|
self._opers: set[str] = set() # hostmasks of known IRC operators
|
||||||
self._caps: set[str] = set() # negotiated IRCv3 caps
|
self._caps: set[str] = set() # negotiated IRCv3 caps
|
||||||
@@ -74,16 +96,24 @@ class Bot:
|
|||||||
)
|
)
|
||||||
|
|
||||||
async def start(self) -> None:
|
async def start(self) -> None:
|
||||||
"""Connect, register, join channels, and enter the main loop."""
|
"""Connect, register, join channels, and enter the main loop.
|
||||||
|
|
||||||
|
Uses exponential backoff with jitter on reconnect (5s -> 300s cap).
|
||||||
|
Delay resets after a successful connection.
|
||||||
|
"""
|
||||||
self._running = True
|
self._running = True
|
||||||
while self._running:
|
while self._running:
|
||||||
try:
|
try:
|
||||||
await self._connect_and_run()
|
await self._connect_and_run()
|
||||||
|
self._reconnect_delay = 5.0 # reset on clean run
|
||||||
except (OSError, ConnectionError) as exc:
|
except (OSError, ConnectionError) as exc:
|
||||||
log.error("connection lost: %s", exc)
|
log.error("connection lost: %s", exc)
|
||||||
if self._running:
|
if self._running:
|
||||||
log.info("reconnecting in %ds...", RECONNECT_DELAY)
|
jitter = self._reconnect_delay * 0.25 * (2 * random.random() - 1)
|
||||||
await asyncio.sleep(RECONNECT_DELAY)
|
delay = self._reconnect_delay + jitter
|
||||||
|
log.info("reconnecting in %.0fs...", delay)
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
self._reconnect_delay = min(self._reconnect_delay * 2, 300.0)
|
||||||
|
|
||||||
async def _connect_and_run(self) -> None:
|
async def _connect_and_run(self) -> None:
|
||||||
"""Single connection lifecycle."""
|
"""Single connection lifecycle."""
|
||||||
@@ -358,10 +388,17 @@ class Bot:
|
|||||||
# -- Public API for plugins --
|
# -- Public API for plugins --
|
||||||
|
|
||||||
async def send(self, target: str, text: str) -> None:
|
async def send(self, target: str, text: str) -> None:
|
||||||
"""Send a PRIVMSG to a target (channel or nick), rate-limited."""
|
"""Send a PRIVMSG to a target (channel or nick), rate-limited.
|
||||||
|
|
||||||
|
Long lines are split at UTF-8 safe boundaries to stay within
|
||||||
|
the IRC 512-byte line limit (RFC 2812).
|
||||||
|
"""
|
||||||
|
overhead = len(f"PRIVMSG {target} :".encode("utf-8")) + 2 # +CRLF
|
||||||
|
max_text = _MAX_IRC_LINE - overhead
|
||||||
for line in text.split("\n"):
|
for line in text.split("\n"):
|
||||||
await self._bucket.acquire()
|
for chunk in _split_utf8(line, max_text):
|
||||||
await self.conn.send(format_msg("PRIVMSG", target, line))
|
await self._bucket.acquire()
|
||||||
|
await self.conn.send(format_msg("PRIVMSG", target, chunk))
|
||||||
|
|
||||||
async def reply(self, msg: Message, text: str) -> None:
|
async def reply(self, msg: Message, text: str) -> None:
|
||||||
"""Reply to the source of a message (channel or PM)."""
|
"""Reply to the source of a message (channel or PM)."""
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ from dataclasses import dataclass
|
|||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_MAX_IRC_LINE = 512 # bytes including \r\n (RFC 2812)
|
||||||
|
|
||||||
|
|
||||||
def _unescape_tag_value(value: str) -> str:
|
def _unescape_tag_value(value: str) -> str:
|
||||||
"""Unescape an IRCv3 message tag value per the spec."""
|
"""Unescape an IRCv3 message tag value per the spec."""
|
||||||
|
|||||||
Reference in New Issue
Block a user