diff --git a/TASKS.md b/TASKS.md index 112985e..112cf83 100644 --- a/TASKS.md +++ b/TASKS.md @@ -1,6 +1,25 @@ # derp - Tasks -## Current Sprint -- v1.2.7 Subscription Plugin Enrichment (2026-02-19) +## Current Sprint -- v1.2.9 LLM Mode (2026-02-19) + +| Pri | Status | Task | +|-----|--------|------| +| P0 | [x] | `--llm` CLI flag: route logging to `info.log`, stdout for addressed messages | +| P0 | [x] | `_is_addressed()` method: DMs + nick-prefixed | +| P1 | [x] | Stdout routing: PRIVMSG in/out, PING, 001, disconnect, reconnect | +| P2 | [x] | Documentation update (USAGE.md CLI flags + LLM mode section) | + +## Previous Sprint -- v1.2.8 ASN Backend Replacement (2026-02-19) + +| Pri | Status | Task | +|-----|--------|------| +| P0 | [x] | Replace MaxMind ASN with iptoasn.com TSV backend (no license key) | +| P0 | [x] | Bisect-based lookup in `plugins/asn.py` (pure stdlib) | +| P1 | [x] | `update_asn()` in `scripts/update-data.sh` (SOCKS5 download) | +| P2 | [x] | Tests: load, lookup, command handler (30 cases, 906 total) | +| P2 | [x] | Documentation update (USAGE.md data directory layout) | + +## Previous Sprint -- v1.2.7 Subscription Plugin Enrichment (2026-02-19) | Pri | Status | Task | |-----|--------|------| diff --git a/docs/USAGE.md b/docs/USAGE.md index 2656029..dda9473 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -16,6 +16,7 @@ derp --config /path/to/derp.toml --verbose |------|-------------| | `-c, --config PATH` | Config file path | | `-v, --verbose` | Debug logging | +| `--llm` | LLM mode: addressed messages to stdout, rest to info.log | | `--cprofile [PATH]` | Enable cProfile, dump to PATH [derp.prof] | | `--tracemalloc [N]` | Enable tracemalloc, capture N frames deep [10] | | `-V, --version` | Print version | @@ -51,6 +52,7 @@ plugins_dir = "plugins" # Plugin directory path rate_limit = 2.0 # Max messages per second (default: 2.0) rate_burst = 5 # Burst capacity (default: 5) paste_threshold = 4 # Max lines before overflow to FlaskPaste (default: 4) +owner = [] # Owner hostmask patterns (fnmatch), grants admin + LLM access admins = [] # Hostmask patterns (fnmatch), IRCOPs auto-detected timezone = "UTC" # Timezone for calendar reminders (IANA tz name) @@ -216,14 +218,20 @@ Default format is `"text"` (human-readable, same as before). ## Admin System -Commands marked as `admin` require elevated permissions. Admin access is -granted via: +Commands marked as `admin` require elevated permissions. There are two +privilege levels: -1. **IRC operator status** -- detected automatically via `WHO` -2. **Hostmask patterns** -- configured in `[bot] admins`, fnmatch-style +| Level | Source | Grants | +|-------|--------|--------| +| **Owner** | `[bot] owner` hostmask patterns | Admin + LLM mode access | +| **Admin** | `[bot] admins` patterns, IRC operators | Admin commands only | + +Owner is a superset of admin -- owners automatically have admin privileges. +Only owners can interact with the bot via `--llm` mode. ```toml [bot] +owner = ["me!~user@my.host"] admins = [ "*!~user@trusted.host", "ops!*@*.ops.net", @@ -368,7 +376,7 @@ The script is cron-friendly (exit 0/1, quiet unless `NO_COLOR` is unset). ``` data/ GeoLite2-City.mmdb # MaxMind GeoIP (requires license key) - GeoLite2-ASN.mmdb # MaxMind ASN (requires license key) + ip2asn-v4.tsv # iptoasn.com ASN database (no key required) tor-exit-nodes.txt # Tor exit node IPs iprep/ # Firehol/ET blocklist feeds firehol_level1.netset @@ -380,7 +388,8 @@ data/ ... ``` -GeoLite2 databases require a free MaxMind license key. Set +The ASN database is downloaded from iptoasn.com (no account required). +GeoLite2-City requires a free MaxMind license key -- set `MAXMIND_LICENSE_KEY` when running the update script. ## Plugin Management @@ -488,6 +497,39 @@ On connection loss, the bot reconnects with exponential backoff and jitter: - Jitter: +/- 25% to avoid thundering herd - Resets to 5s after a successful connection +## LLM Mode + +Run with `--llm` to split output for LLM consumption. All internal logging +(connection status, plugin loads, unaddressed chatter) goes to `info.log`. +Stdout receives only messages addressed to the bot and the bot's own replies. + +```bash +derp --llm +derp --llm --config config/derp.toml +``` + +### What goes to stdout + +- **DMs**: private messages from an owner +- **Nick-prefixed**: channel messages from an owner starting with `:` or `,` +- **Bot replies**: all messages sent by the bot (PRIVMSG and ACTION) +- **Status lines**: connection, ping, disconnect, reconnect events + +### Output format + +``` +19:09 --- connected as derp +19:09 --- ping +19:09 #test derp: what is 1.1.1.1? +19:09 #test 1.1.1.1: AS13335 CLOUDFLARENET (US) +19:09 hey derp, check this out +19:09 I'm just a bot +19:09 #test * derp [rss/hackernews] New article -- URL +19:15 --- disconnected: Connection reset +19:15 --- reconnecting in 5s +19:15 --- connected as derp +``` + ### `!dork` -- Google Dork Query Builder Generate Google dork queries for a target domain. Template-based, no HTTP diff --git a/src/derp/bot.py b/src/derp/bot.py index 4d4b175..a57c6b3 100644 --- a/src/derp/bot.py +++ b/src/derp/bot.py @@ -7,6 +7,7 @@ import base64 import fnmatch import logging import random +import sys import time from datetime import datetime, timezone from pathlib import Path @@ -77,9 +78,10 @@ class _TokenBucket: class Bot: """IRC bot: ties connection, config, and plugins together.""" - def __init__(self, config: dict, registry: PluginRegistry) -> None: + def __init__(self, config: dict, registry: PluginRegistry, *, llm: bool = False) -> None: self.config = config self.registry = registry + self._llm = llm self.conn = IRCConnection( host=config["server"]["host"], port=config["server"]["port"], @@ -92,6 +94,7 @@ class Bot: self._started: float = time.monotonic() self._tasks: set[asyncio.Task] = set() self._reconnect_delay: float = 5.0 + self._owner: list[str] = config.get("bot", {}).get("owner", []) self._admins: list[str] = config.get("bot", {}).get("admins", []) self._opers: set[str] = set() # hostmasks of known IRC operators self._caps: set[str] = set() # negotiated IRCv3 caps @@ -117,10 +120,18 @@ class Bot: self._reconnect_delay = 5.0 # reset on clean run except (OSError, ConnectionError) as exc: log.error("connection lost: %s", exc) + if self._llm: + ts = time.strftime("%H:%M") + sys.stdout.write(f"{ts} --- disconnected: {exc}\n") + sys.stdout.flush() if self._running: jitter = self._reconnect_delay * 0.25 * (2 * random.random() - 1) delay = self._reconnect_delay + jitter log.info("reconnecting in %.0fs...", delay) + if self._llm: + ts = time.strftime("%H:%M") + sys.stdout.write(f"{ts} --- reconnecting in {delay:.0f}s\n") + sys.stdout.flush() await asyncio.sleep(delay) self._reconnect_delay = min(self._reconnect_delay * 2, 300.0) @@ -259,11 +270,19 @@ class Bot: # Protocol-level PING/PONG if msg.command == "PING": await self.conn.send(format_msg("PONG", msg.params[0] if msg.params else "")) + if self._llm: + ts = time.strftime("%H:%M") + sys.stdout.write(f"{ts} --- ping\n") + sys.stdout.flush() return # RPL_WELCOME (001) — join channels and WHO for oper detection if msg.command == "001": self.nick = msg.params[0] if msg.params else self.nick + if self._llm: + ts = time.strftime("%H:%M") + sys.stdout.write(f"{ts} --- connected as {self.nick}\n") + sys.stdout.flush() for channel in self.config["bot"]["channels"]: await self.join(channel) await self.conn.send(format_msg("WHO", channel)) @@ -300,6 +319,16 @@ class Bot: self._spawn(self._handle_ctcp(msg), name="ctcp") return + # LLM mode: route PRIVMSG to stdout or info.log + if self._llm and msg.command == "PRIVMSG" and msg.text: + ts = time.strftime("%H:%M") + if self._is_addressed(msg) and self._is_owner(msg): + prefix = f"{msg.target} " if msg.is_channel else "" + sys.stdout.write(f"{ts} {prefix}<{msg.nick}> {msg.text}\n") + sys.stdout.flush() + else: + log.info("%s <%s> %s", msg.target, msg.nick, msg.text) + # Dispatch to event handlers (fire-and-forget) channel = msg.target if msg.is_channel else None event_type = msg.command @@ -359,14 +388,25 @@ class Bot: return True return plugin_name in allowed + def _is_owner(self, msg: Message) -> bool: + """Check if the sender matches a configured owner hostmask pattern.""" + if not msg.prefix: + return False + for pattern in self._owner: + if fnmatch.fnmatch(msg.prefix, pattern): + return True + return False + def _is_admin(self, msg: Message) -> bool: """Check if the message sender is a bot admin. - Returns True if the sender is a known IRC operator or matches - a configured hostmask pattern (fnmatch-style). + Returns True if the sender is an owner, a known IRC operator, + or matches a configured admin hostmask pattern (fnmatch-style). """ if not msg.prefix: return False + if self._is_owner(msg): + return True if msg.prefix in self._opers: return True for pattern in self._admins: @@ -374,6 +414,18 @@ class Bot: return True return False + def _is_addressed(self, msg: Message) -> bool: + """Check if a message is addressed to the bot (DM or nick-prefixed).""" + if not msg.is_channel: + return True + text = (msg.text or "").lstrip() + nick_lower = self.nick.lower() + if text.lower().startswith(nick_lower): + rest = text[len(nick_lower):] + if rest and rest[0] in ":, ": + return True + return False + def _dispatch_command(self, msg: Message) -> None: """Check if a PRIVMSG is a bot command and spawn it.""" text = msg.text @@ -453,6 +505,16 @@ class Bot: for chunk in _split_utf8(line, max_text): await self._bucket.acquire() await self.conn.send(format_msg("PRIVMSG", target, chunk)) + if self._llm: + ts = time.strftime("%H:%M") + display = chunk + if display.startswith("\x01ACTION ") and display.endswith("\x01"): + display = display[8:-1] + out = f"{ts} {target} * {self.nick} {display}" + else: + out = f"{ts} {target} <{self.nick}> {display}" + sys.stdout.write(out + "\n") + sys.stdout.flush() async def reply(self, msg: Message, text: str) -> None: """Reply to the source of a message (channel or PM).""" diff --git a/src/derp/cli.py b/src/derp/cli.py index 4b2dbd6..5bd1209 100644 --- a/src/derp/cli.py +++ b/src/derp/cli.py @@ -33,6 +33,11 @@ def build_parser() -> argparse.ArgumentParser: action="store_true", help="enable debug logging", ) + p.add_argument( + "--llm", + action="store_true", + help="LLM mode: addressed messages to stdout, rest to info.log", + ) p.add_argument( "--cprofile", metavar="PATH", @@ -111,7 +116,14 @@ def main(argv: list[str] | None = None) -> int: level = logging.DEBUG if args.verbose else logging.INFO log_fmt = config.get("logging", {}).get("format", "text") - if log_fmt == "json": + if args.llm: + handler = logging.FileHandler("info.log") + if log_fmt == "json": + handler.setFormatter(JsonFormatter()) + else: + handler.setFormatter(logging.Formatter(LOG_FORMAT, datefmt=LOG_DATE)) + logging.basicConfig(handlers=[handler], level=level) + elif log_fmt == "json": handler = logging.StreamHandler() handler.setFormatter(JsonFormatter()) logging.basicConfig(handlers=[handler], level=level) @@ -122,7 +134,7 @@ def main(argv: list[str] | None = None) -> int: log.info("derp %s starting", __version__) registry = PluginRegistry() - bot = Bot(config, registry) + bot = Bot(config, registry, llm=args.llm) bot.load_plugins() if args.tracemalloc: