fix: account for server prefix in IRC line splitting

The 512-byte IRC limit includes the :nick!user@host prefix the server
prepends when relaying. Reserve 64 bytes for it and prefer splitting at
space boundaries instead of mid-word. Also strip the command prefix and
"Commands:" label from help output to keep the listing compact.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
user
2026-02-16 22:02:52 +01:00
parent eb37fef730
commit e8d803abe6
2 changed files with 14 additions and 3 deletions

View File

@@ -52,7 +52,7 @@ async def cmd_help(bot, message):
k for k, v in bot.registry.commands.items() k for k, v in bot.registry.commands.items()
if bot._plugin_allowed(v.plugin, channel) if bot._plugin_allowed(v.plugin, channel)
) )
await bot.reply(message, f"Commands: {', '.join(bot.prefix + n for n in names)}") await bot.reply(message, ", ".join(names))
@command("version", help="Show bot version") @command("version", help="Show bot version")

View File

@@ -22,7 +22,10 @@ _AMBIGUOUS = object() # sentinel for ambiguous prefix matches
def _split_utf8(text: str, max_bytes: int) -> list[str]: def _split_utf8(text: str, max_bytes: int) -> list[str]:
"""Split text into chunks that fit within max_bytes when UTF-8 encoded.""" """Split text into chunks that fit within max_bytes when UTF-8 encoded.
Prefers splitting at space boundaries to avoid mid-word breaks.
"""
encoded = text.encode("utf-8") encoded = text.encode("utf-8")
if len(encoded) <= max_bytes: if len(encoded) <= max_bytes:
return [text] return [text]
@@ -37,6 +40,11 @@ def _split_utf8(text: str, max_bytes: int) -> list[str]:
cut -= 1 cut -= 1
if cut == 0: if cut == 0:
cut = max_bytes # degenerate: force split cut = max_bytes # degenerate: force split
else:
# Try to find a space to split at (within last 25% of chunk)
space = encoded.rfind(b" ", cut // 4, cut)
if space > 0:
cut = space + 1 # keep the space in the first chunk
chunks.append(encoded[:cut].decode("utf-8")) chunks.append(encoded[:cut].decode("utf-8"))
encoded = encoded[cut:] encoded = encoded[cut:]
return chunks return chunks
@@ -436,7 +444,10 @@ class Bot:
Long lines are split at UTF-8 safe boundaries to stay within Long lines are split at UTF-8 safe boundaries to stay within
the IRC 512-byte line limit (RFC 2812). the IRC 512-byte line limit (RFC 2812).
""" """
overhead = len(f"PRIVMSG {target} :".encode("utf-8")) + 2 # +CRLF # Server prepends ":nick!user@host " when relaying; account for it.
# Use 64 bytes as conservative estimate for the source prefix.
prefix_overhead = 64
overhead = prefix_overhead + len(f"PRIVMSG {target} :".encode("utf-8")) + 2
max_text = _MAX_IRC_LINE - overhead max_text = _MAX_IRC_LINE - overhead
for line in text.split("\n"): for line in text.split("\n"):
for chunk in _split_utf8(line, max_text): for chunk in _split_utf8(line, max_text):