From e8d803abe6f1b4bf47187adf6f629cef0b24f24a Mon Sep 17 00:00:00 2001 From: user Date: Mon, 16 Feb 2026 22:02:52 +0100 Subject: [PATCH] fix: account for server prefix in IRC line splitting The 512-byte IRC limit includes the :nick!user@host prefix the server prepends when relaying. Reserve 64 bytes for it and prefer splitting at space boundaries instead of mid-word. Also strip the command prefix and "Commands:" label from help output to keep the listing compact. Co-Authored-By: Claude Opus 4.6 --- plugins/core.py | 2 +- src/derp/bot.py | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/plugins/core.py b/plugins/core.py index 66940ac..318f04a 100644 --- a/plugins/core.py +++ b/plugins/core.py @@ -52,7 +52,7 @@ async def cmd_help(bot, message): k for k, v in bot.registry.commands.items() if bot._plugin_allowed(v.plugin, channel) ) - await bot.reply(message, f"Commands: {', '.join(bot.prefix + n for n in names)}") + await bot.reply(message, ", ".join(names)) @command("version", help="Show bot version") diff --git a/src/derp/bot.py b/src/derp/bot.py index 056d6b9..dc321ce 100644 --- a/src/derp/bot.py +++ b/src/derp/bot.py @@ -22,7 +22,10 @@ _AMBIGUOUS = object() # sentinel for ambiguous prefix matches def _split_utf8(text: str, max_bytes: int) -> list[str]: - """Split text into chunks that fit within max_bytes when UTF-8 encoded.""" + """Split text into chunks that fit within max_bytes when UTF-8 encoded. + + Prefers splitting at space boundaries to avoid mid-word breaks. + """ encoded = text.encode("utf-8") if len(encoded) <= max_bytes: return [text] @@ -37,6 +40,11 @@ def _split_utf8(text: str, max_bytes: int) -> list[str]: cut -= 1 if cut == 0: cut = max_bytes # degenerate: force split + else: + # Try to find a space to split at (within last 25% of chunk) + space = encoded.rfind(b" ", cut // 4, cut) + if space > 0: + cut = space + 1 # keep the space in the first chunk chunks.append(encoded[:cut].decode("utf-8")) encoded = encoded[cut:] return chunks @@ -436,7 +444,10 @@ class Bot: Long lines are split at UTF-8 safe boundaries to stay within the IRC 512-byte line limit (RFC 2812). """ - overhead = len(f"PRIVMSG {target} :".encode("utf-8")) + 2 # +CRLF + # Server prepends ":nick!user@host " when relaying; account for it. + # Use 64 bytes as conservative estimate for the source prefix. + prefix_overhead = 64 + overhead = prefix_overhead + len(f"PRIVMSG {target} :".encode("utf-8")) + 2 max_text = _MAX_IRC_LINE - overhead for line in text.split("\n"): for chunk in _split_utf8(line, max_text):