feat: add OpenRouter LLM chat plugin (!ask, !chat)

Single-shot (!ask) and conversational (!chat) LLM commands backed by OpenRouter's API. Per-user history (20 msg cap), 5s cooldown, reasoning model fallback, and model switching via subcommands. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-22 05:39:11 +01:00
parent 66116d2caf
commit 95981275b5
4 changed files with 889 additions and 0 deletions
--- a/plugins/llm.py
+++ b/plugins/llm.py
@@ -0,0 +1,298 @@
+"""Plugin: LLM chat via OpenRouter."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+import time
+import urllib.request
+
+from derp.http import urlopen as _urlopen
+from derp.plugin import command
+
+_log = logging.getLogger(__name__)
+
+# -- Constants ---------------------------------------------------------------
+
+_API_URL = "https://openrouter.ai/api/v1/chat/completions"
+_DEFAULT_MODEL = "openrouter/auto"
+_TIMEOUT = 30
+_MAX_HISTORY = 20
+_MAX_REPLY_LEN = 400
+_COOLDOWN = 5
+
+_DEFAULT_SYSTEM = (
+    "You are a helpful IRC bot assistant. Keep responses concise and under 200 words."
+)
+
+
+# -- Per-bot runtime state ---------------------------------------------------
+
+def _ps(bot):
+    """Per-bot plugin runtime state."""
+    return bot._pstate.setdefault("llm", {
+        "histories": {},   # {nick: [{"role": ..., "content": ...}, ...]}
+        "cooldowns": {},   # {nick: monotonic_ts}
+        "model": "",       # override per-bot; empty = use default
+    })
+
+
+# -- Helpers -----------------------------------------------------------------
+
+def _get_api_key(bot) -> str:
+    """Resolve API key from env or config."""
+    return (
+        os.environ.get("OPENROUTER_API_KEY", "")
+        or bot.config.get("openrouter", {}).get("api_key", "")
+    )
+
+
+def _get_model(bot) -> str:
+    """Resolve current model."""
+    ps = _ps(bot)
+    return (
+        ps["model"]
+        or bot.config.get("openrouter", {}).get("model", "")
+        or _DEFAULT_MODEL
+    )
+
+
+def _get_system_prompt(bot) -> str:
+    """Resolve system prompt from config or default."""
+    return bot.config.get("openrouter", {}).get("system_prompt", _DEFAULT_SYSTEM)
+
+
+def _truncate(text: str, max_len: int = _MAX_REPLY_LEN) -> str:
+    """Truncate text with ellipsis if needed."""
+    if len(text) <= max_len:
+        return text
+    return text[: max_len - 3].rstrip() + "..."
+
+
+def _check_cooldown(bot, nick: str) -> bool:
+    """Return True if the user is within cooldown period."""
+    ps = _ps(bot)
+    last = ps["cooldowns"].get(nick, 0)
+    return (time.monotonic() - last) < _COOLDOWN
+
+
+def _set_cooldown(bot, nick: str) -> None:
+    """Record a cooldown timestamp for a user."""
+    _ps(bot)["cooldowns"][nick] = time.monotonic()
+
+
+# -- Blocking HTTP call ------------------------------------------------------
+
+def _chat_request(api_key: str, model: str, messages: list[dict]) -> dict:
+    """Blocking OpenRouter chat completion. Run via executor.
+
+    Returns the parsed JSON response dict.
+    Raises on HTTP or connection errors.
+    """
+    payload = json.dumps({
+        "model": model,
+        "messages": messages,
+    }).encode()
+
+    req = urllib.request.Request(_API_URL, data=payload, method="POST")
+    req.add_header("Authorization", f"Bearer {api_key}")
+    req.add_header("Content-Type", "application/json")
+
+    resp = _urlopen(req, timeout=_TIMEOUT)
+    raw = resp.read()
+    resp.close()
+
+    return json.loads(raw)
+
+
+def _extract_reply(data: dict) -> str:
+    """Extract reply text from OpenRouter response.
+
+    Handles reasoning models that return content="" with a reasoning field.
+    """
+    choices = data.get("choices", [])
+    if not choices:
+        return ""
+
+    msg = choices[0].get("message", {})
+    content = (msg.get("content") or "").strip()
+    if content:
+        return content
+
+    # Fallback for reasoning models
+    reasoning = (msg.get("reasoning") or "").strip()
+    return reasoning
+
+
+# -- Command handlers --------------------------------------------------------
+
+@command("ask", help="Ask: !ask <question>")
+async def cmd_ask(bot, message):
+    """Single-shot LLM question (no history).
+
+    Usage: !ask <question>
+    """
+    parts = message.text.split(None, 1)
+    if len(parts) < 2 or not parts[1].strip():
+        await bot.reply(message, "Usage: !ask <question>")
+        return
+
+    api_key = _get_api_key(bot)
+    if not api_key:
+        await bot.reply(message, "OpenRouter API key not configured")
+        return
+
+    nick = message.nick
+    if _check_cooldown(bot, nick):
+        await bot.reply(message, "Cooldown -- wait a few seconds")
+        return
+
+    prompt = parts[1].strip()
+    model = _get_model(bot)
+    system = _get_system_prompt(bot)
+    messages = [
+        {"role": "system", "content": system},
+        {"role": "user", "content": prompt},
+    ]
+
+    _set_cooldown(bot, nick)
+
+    loop = asyncio.get_running_loop()
+    try:
+        data = await loop.run_in_executor(
+            None, _chat_request, api_key, model, messages,
+        )
+    except urllib.error.HTTPError as exc:
+        if exc.code == 429:
+            await bot.reply(message, "Rate limited by OpenRouter -- try again later")
+        else:
+            await bot.reply(message, f"API error: HTTP {exc.code}")
+        return
+    except Exception as exc:
+        _log.warning("LLM request failed: %s", exc)
+        await bot.reply(message, f"Request failed: {exc}")
+        return
+
+    reply = _extract_reply(data)
+    if not reply:
+        await bot.reply(message, "No response from model")
+        return
+
+    lines = _truncate(reply).split("\n")
+    await bot.long_reply(message, lines, label="llm")
+
+
+@command("chat", help="Chat: !chat <msg> | clear | model [name] | models")
+async def cmd_chat(bot, message):
+    """Conversational LLM chat with per-user history.
+
+    Usage:
+        !chat <message>      Send a message (maintains history)
+        !chat clear           Clear your conversation history
+        !chat model           Show current model
+        !chat model <name>    Switch model
+        !chat models          List popular free models
+    """
+    parts = message.text.split(None, 2)
+    if len(parts) < 2 or not parts[1].strip():
+        await bot.reply(message, "Usage: !chat <message> | clear | model [name] | models")
+        return
+
+    sub = parts[1].strip().lower()
+
+    # -- Subcommands ---------------------------------------------------------
+
+    if sub == "clear":
+        ps = _ps(bot)
+        nick = message.nick
+        if nick in ps["histories"]:
+            del ps["histories"][nick]
+        await bot.reply(message, "Conversation cleared")
+        return
+
+    if sub == "model":
+        if len(parts) > 2 and parts[2].strip():
+            new_model = parts[2].strip()
+            _ps(bot)["model"] = new_model
+            await bot.reply(message, f"Model set to: {new_model}")
+        else:
+            await bot.reply(message, f"Current model: {_get_model(bot)}")
+        return
+
+    if sub == "models":
+        models = [
+            "openrouter/auto -- auto-route to best available",
+            "google/gemma-3-27b-it:free",
+            "meta-llama/llama-3.3-70b-instruct:free",
+            "deepseek/deepseek-r1:free",
+            "qwen/qwen3-235b-a22b:free",
+            "mistralai/mistral-small-3.1-24b-instruct:free",
+        ]
+        await bot.long_reply(message, models, label="models")
+        return
+
+    # -- Chat path -----------------------------------------------------------
+
+    api_key = _get_api_key(bot)
+    if not api_key:
+        await bot.reply(message, "OpenRouter API key not configured")
+        return
+
+    nick = message.nick
+    if _check_cooldown(bot, nick):
+        await bot.reply(message, "Cooldown -- wait a few seconds")
+        return
+
+    # Reconstruct full user text (sub might be part of the message)
+    user_text = message.text.split(None, 1)[1].strip()
+
+    ps = _ps(bot)
+    history = ps["histories"].setdefault(nick, [])
+
+    # Build messages
+    system = _get_system_prompt(bot)
+    history.append({"role": "user", "content": user_text})
+
+    # Cap history
+    if len(history) > _MAX_HISTORY:
+        history[:] = history[-_MAX_HISTORY:]
+
+    messages = [{"role": "system", "content": system}] + history
+
+    model = _get_model(bot)
+    _set_cooldown(bot, nick)
+
+    loop = asyncio.get_running_loop()
+    try:
+        data = await loop.run_in_executor(
+            None, _chat_request, api_key, model, messages,
+        )
+    except urllib.error.HTTPError as exc:
+        # Remove the failed user message from history
+        history.pop()
+        if exc.code == 429:
+            await bot.reply(message, "Rate limited by OpenRouter -- try again later")
+        else:
+            await bot.reply(message, f"API error: HTTP {exc.code}")
+        return
+    except Exception as exc:
+        history.pop()
+        _log.warning("LLM request failed: %s", exc)
+        await bot.reply(message, f"Request failed: {exc}")
+        return
+
+    reply = _extract_reply(data)
+    if not reply:
+        history.pop()
+        await bot.reply(message, "No response from model")
+        return
+
+    # Store assistant reply in history
+    history.append({"role": "assistant", "content": reply})
+    if len(history) > _MAX_HISTORY:
+        history[:] = history[-_MAX_HISTORY:]
+
+    lines = _truncate(reply).split("\n")
+    await bot.long_reply(message, lines, label="llm")