From 95981275b59a30383729f931e6f03243f2e6ad8f Mon Sep 17 00:00:00 2001
From: user <user@rpios.local>
Date: Sun, 22 Feb 2026 05:39:11 +0100
Subject: [PATCH] feat: add OpenRouter LLM chat plugin (!ask, !chat)

Single-shot (!ask) and conversational (!chat) LLM commands backed by
OpenRouter's API. Per-user history (20 msg cap), 5s cooldown, reasoning
model fallback, and model switching via subcommands.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 docker-compose.yml |   2 +
 docs/USAGE.md      |  51 +++++
 plugins/llm.py     | 298 +++++++++++++++++++++++++
 tests/test_llm.py  | 538 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 889 insertions(+)
 create mode 100644 plugins/llm.py
 create mode 100644 tests/test_llm.py
diff --git a/docker-compose.yml b/docker-compose.yml
index 6d0620f..7704c01 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -16,4 +16,6 @@ services:
       - ./config/derp.toml:/app/config/derp.toml:ro,Z
       - ./data:/app/data:Z
       - ./secrets:/app/secrets:ro,Z
+    environment:
+      - OPENROUTER_API_KEY
     command: ["--verbose", "--cprofile"]
diff --git a/docs/USAGE.md b/docs/USAGE.md
index fa09ac9..386de98 100644
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -187,6 +187,8 @@ unchanged. The server name is derived from the hostname automatically.
 | `!username list` | Show available services by category |
 | `!alert <add\|del\|list\|check\|info\|history>` | Keyword alert subscriptions across platforms |
 | `!searx <query>` | Search SearXNG and show top results |
+| `!ask <question>` | Single-shot LLM question via OpenRouter |
+| `!chat <msg\|clear\|model\|models>` | Conversational LLM chat with history |
 | `!jwt <token>` | Decode JWT header, claims, and flag issues |
 | `!mac <address\|random\|update>` | MAC OUI vendor lookup / random MAC |
 | `!abuse <ip> [ip2 ...]` | AbuseIPDB reputation check |
@@ -815,6 +817,55 @@ Title Two -- https://example.com/page2
 Title Three -- https://example.com/page3
 ```
 
+### `!ask` / `!chat` -- LLM Chat (OpenRouter)
+
+Chat with large language models via [OpenRouter](https://openrouter.ai/)'s
+API. `!ask` is stateless (single question), `!chat` maintains per-user
+conversation history.
+
+```
+!ask <question>              Single-shot question (no history)
+!chat <message>              Chat with conversation history
+!chat clear                  Clear your history
+!chat model                  Show current model
+!chat model <name>           Switch model
+!chat models                 List suggested free models
+```
+
+Output format:
+
+```
+<alice> !ask what is DNS
+<derp> DNS (Domain Name System) translates domain names to IP addresses...
+
+<alice> !chat explain TCP
+<derp> TCP is a connection-oriented transport protocol...
+<alice> !chat how does the handshake work
+<derp> The TCP three-way handshake: SYN, SYN-ACK, ACK...
+```
+
+- Open to all users, works in channels and PMs
+- Per-user cooldown: 5 seconds between requests
+- Conversation history capped at 20 messages per user (ephemeral, not
+  persisted across restarts)
+- Responses truncated to 400 characters; multi-line replies use paste overflow
+- Default model: `openrouter/auto` (auto-routes to best available free model)
+- Reasoning models (DeepSeek R1) are handled transparently -- falls back to
+  the `reasoning` field when `content` is empty
+- Rate limit errors (HTTP 429) produce a clear user-facing message
+
+Configuration:
+
+```toml
+[openrouter]
+api_key = ""                  # or set OPENROUTER_API_KEY env var
+model = "openrouter/auto"     # default model
+system_prompt = "You are a helpful IRC bot assistant. Keep responses concise and under 200 words."
+```
+
+API key: set `OPENROUTER_API_KEY` env var (preferred) or `api_key` under
+`[openrouter]` in config. The env var takes precedence.
+
 ### `!alert` -- Keyword Alert Subscriptions
 
 Search keywords across 27 platforms and announce new results. Unlike
diff --git a/plugins/llm.py b/plugins/llm.py
new file mode 100644
index 0000000..88ad2ee
--- /dev/null
+++ b/plugins/llm.py
@@ -0,0 +1,298 @@
+"""Plugin: LLM chat via OpenRouter."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+import time
+import urllib.request
+
+from derp.http import urlopen as _urlopen
+from derp.plugin import command
+
+_log = logging.getLogger(__name__)
+
+# -- Constants ---------------------------------------------------------------
+
+_API_URL = "https://openrouter.ai/api/v1/chat/completions"
+_DEFAULT_MODEL = "openrouter/auto"
+_TIMEOUT = 30
+_MAX_HISTORY = 20
+_MAX_REPLY_LEN = 400
+_COOLDOWN = 5
+
+_DEFAULT_SYSTEM = (
+    "You are a helpful IRC bot assistant. Keep responses concise and under 200 words."
+)
+
+
+# -- Per-bot runtime state ---------------------------------------------------
+
+def _ps(bot):
+    """Per-bot plugin runtime state."""
+    return bot._pstate.setdefault("llm", {
+        "histories": {},   # {nick: [{"role": ..., "content": ...}, ...]}
+        "cooldowns": {},   # {nick: monotonic_ts}
+        "model": "",       # override per-bot; empty = use default
+    })
+
+
+# -- Helpers -----------------------------------------------------------------
+
+def _get_api_key(bot) -> str:
+    """Resolve API key from env or config."""
+    return (
+        os.environ.get("OPENROUTER_API_KEY", "")
+        or bot.config.get("openrouter", {}).get("api_key", "")
+    )
+
+
+def _get_model(bot) -> str:
+    """Resolve current model."""
+    ps = _ps(bot)
+    return (
+        ps["model"]
+        or bot.config.get("openrouter", {}).get("model", "")
+        or _DEFAULT_MODEL
+    )
+
+
+def _get_system_prompt(bot) -> str:
+    """Resolve system prompt from config or default."""
+    return bot.config.get("openrouter", {}).get("system_prompt", _DEFAULT_SYSTEM)
+
+
+def _truncate(text: str, max_len: int = _MAX_REPLY_LEN) -> str:
+    """Truncate text with ellipsis if needed."""
+    if len(text) <= max_len:
+        return text
+    return text[: max_len - 3].rstrip() + "..."
+
+
+def _check_cooldown(bot, nick: str) -> bool:
+    """Return True if the user is within cooldown period."""
+    ps = _ps(bot)
+    last = ps["cooldowns"].get(nick, 0)
+    return (time.monotonic() - last) < _COOLDOWN
+
+
+def _set_cooldown(bot, nick: str) -> None:
+    """Record a cooldown timestamp for a user."""
+    _ps(bot)["cooldowns"][nick] = time.monotonic()
+
+
+# -- Blocking HTTP call ------------------------------------------------------
+
+def _chat_request(api_key: str, model: str, messages: list[dict]) -> dict:
+    """Blocking OpenRouter chat completion. Run via executor.
+
+    Returns the parsed JSON response dict.
+    Raises on HTTP or connection errors.
+    """
+    payload = json.dumps({
+        "model": model,
+        "messages": messages,
+    }).encode()
+
+    req = urllib.request.Request(_API_URL, data=payload, method="POST")
+    req.add_header("Authorization", f"Bearer {api_key}")
+    req.add_header("Content-Type", "application/json")
+
+    resp = _urlopen(req, timeout=_TIMEOUT)
+    raw = resp.read()
+    resp.close()
+
+    return json.loads(raw)
+
+
+def _extract_reply(data: dict) -> str:
+    """Extract reply text from OpenRouter response.
+
+    Handles reasoning models that return content="" with a reasoning field.
+    """
+    choices = data.get("choices", [])
+    if not choices:
+        return ""
+
+    msg = choices[0].get("message", {})
+    content = (msg.get("content") or "").strip()
+    if content:
+        return content
+
+    # Fallback for reasoning models
+    reasoning = (msg.get("reasoning") or "").strip()
+    return reasoning
+
+
+# -- Command handlers --------------------------------------------------------
+
+@command("ask", help="Ask: !ask <question>")
+async def cmd_ask(bot, message):
+    """Single-shot LLM question (no history).
+
+    Usage: !ask <question>
+    """
+    parts = message.text.split(None, 1)
+    if len(parts) < 2 or not parts[1].strip():
+        await bot.reply(message, "Usage: !ask <question>")
+        return
+
+    api_key = _get_api_key(bot)
+    if not api_key:
+        await bot.reply(message, "OpenRouter API key not configured")
+        return
+
+    nick = message.nick
+    if _check_cooldown(bot, nick):
+        await bot.reply(message, "Cooldown -- wait a few seconds")
+        return
+
+    prompt = parts[1].strip()
+    model = _get_model(bot)
+    system = _get_system_prompt(bot)
+    messages = [
+        {"role": "system", "content": system},
+        {"role": "user", "content": prompt},
+    ]
+
+    _set_cooldown(bot, nick)
+
+    loop = asyncio.get_running_loop()
+    try:
+        data = await loop.run_in_executor(
+            None, _chat_request, api_key, model, messages,
+        )
+    except urllib.error.HTTPError as exc:
+        if exc.code == 429:
+            await bot.reply(message, "Rate limited by OpenRouter -- try again later")
+        else:
+            await bot.reply(message, f"API error: HTTP {exc.code}")
+        return
+    except Exception as exc:
+        _log.warning("LLM request failed: %s", exc)
+        await bot.reply(message, f"Request failed: {exc}")
+        return
+
+    reply = _extract_reply(data)
+    if not reply:
+        await bot.reply(message, "No response from model")
+        return
+
+    lines = _truncate(reply).split("\n")
+    await bot.long_reply(message, lines, label="llm")
+
+
+@command("chat", help="Chat: !chat <msg> | clear | model [name] | models")
+async def cmd_chat(bot, message):
+    """Conversational LLM chat with per-user history.
+
+    Usage:
+        !chat <message>      Send a message (maintains history)
+        !chat clear           Clear your conversation history
+        !chat model           Show current model
+        !chat model <name>    Switch model
+        !chat models          List popular free models
+    """
+    parts = message.text.split(None, 2)
+    if len(parts) < 2 or not parts[1].strip():
+        await bot.reply(message, "Usage: !chat <message> | clear | model [name] | models")
+        return
+
+    sub = parts[1].strip().lower()
+
+    # -- Subcommands ---------------------------------------------------------
+
+    if sub == "clear":
+        ps = _ps(bot)
+        nick = message.nick
+        if nick in ps["histories"]:
+            del ps["histories"][nick]
+        await bot.reply(message, "Conversation cleared")
+        return
+
+    if sub == "model":
+        if len(parts) > 2 and parts[2].strip():
+            new_model = parts[2].strip()
+            _ps(bot)["model"] = new_model
+            await bot.reply(message, f"Model set to: {new_model}")
+        else:
+            await bot.reply(message, f"Current model: {_get_model(bot)}")
+        return
+
+    if sub == "models":
+        models = [
+            "openrouter/auto -- auto-route to best available",
+            "google/gemma-3-27b-it:free",
+            "meta-llama/llama-3.3-70b-instruct:free",
+            "deepseek/deepseek-r1:free",
+            "qwen/qwen3-235b-a22b:free",
+            "mistralai/mistral-small-3.1-24b-instruct:free",
+        ]
+        await bot.long_reply(message, models, label="models")
+        return
+
+    # -- Chat path -----------------------------------------------------------
+
+    api_key = _get_api_key(bot)
+    if not api_key:
+        await bot.reply(message, "OpenRouter API key not configured")
+        return
+
+    nick = message.nick
+    if _check_cooldown(bot, nick):
+        await bot.reply(message, "Cooldown -- wait a few seconds")
+        return
+
+    # Reconstruct full user text (sub might be part of the message)
+    user_text = message.text.split(None, 1)[1].strip()
+
+    ps = _ps(bot)
+    history = ps["histories"].setdefault(nick, [])
+
+    # Build messages
+    system = _get_system_prompt(bot)
+    history.append({"role": "user", "content": user_text})
+
+    # Cap history
+    if len(history) > _MAX_HISTORY:
+        history[:] = history[-_MAX_HISTORY:]
+
+    messages = [{"role": "system", "content": system}] + history
+
+    model = _get_model(bot)
+    _set_cooldown(bot, nick)
+
+    loop = asyncio.get_running_loop()
+    try:
+        data = await loop.run_in_executor(
+            None, _chat_request, api_key, model, messages,
+        )
+    except urllib.error.HTTPError as exc:
+        # Remove the failed user message from history
+        history.pop()
+        if exc.code == 429:
+            await bot.reply(message, "Rate limited by OpenRouter -- try again later")
+        else:
+            await bot.reply(message, f"API error: HTTP {exc.code}")
+        return
+    except Exception as exc:
+        history.pop()
+        _log.warning("LLM request failed: %s", exc)
+        await bot.reply(message, f"Request failed: {exc}")
+        return
+
+    reply = _extract_reply(data)
+    if not reply:
+        history.pop()
+        await bot.reply(message, "No response from model")
+        return
+
+    # Store assistant reply in history
+    history.append({"role": "assistant", "content": reply})
+    if len(history) > _MAX_HISTORY:
+        history[:] = history[-_MAX_HISTORY:]
+
+    lines = _truncate(reply).split("\n")
+    await bot.long_reply(message, lines, label="llm")
diff --git a/tests/test_llm.py b/tests/test_llm.py
new file mode 100644
index 0000000..4431e71
--- /dev/null
+++ b/tests/test_llm.py
@@ -0,0 +1,538 @@
+"""Tests for the OpenRouter LLM chat plugin."""
+
+import asyncio
+import importlib.util
+import json
+import sys
+import time
+import urllib.error
+from pathlib import Path
+from unittest.mock import patch
+
+from derp.irc import Message
+
+# plugins/ is not a Python package -- load the module from file path
+_spec = importlib.util.spec_from_file_location(
+    "plugins.llm", Path(__file__).resolve().parent.parent / "plugins" / "llm.py",
+)
+_mod = importlib.util.module_from_spec(_spec)
+sys.modules[_spec.name] = _mod
+_spec.loader.exec_module(_mod)
+
+from plugins.llm import (  # noqa: E402
+    _COOLDOWN,
+    _MAX_HISTORY,
+    _MAX_REPLY_LEN,
+    _chat_request,
+    _check_cooldown,
+    _extract_reply,
+    _get_api_key,
+    _get_model,
+    _ps,
+    _set_cooldown,
+    _truncate,
+    cmd_ask,
+    cmd_chat,
+)
+
+
+# -- Helpers -----------------------------------------------------------------
+
+class _FakeState:
+    """In-memory stand-in for bot.state."""
+
+    def __init__(self):
+        self._store: dict[str, dict[str, str]] = {}
+
+    def get(self, plugin: str, key: str, default: str | None = None) -> str | None:
+        return self._store.get(plugin, {}).get(key, default)
+
+    def set(self, plugin: str, key: str, value: str) -> None:
+        self._store.setdefault(plugin, {})[key] = value
+
+    def delete(self, plugin: str, key: str) -> bool:
+        try:
+            del self._store[plugin][key]
+            return True
+        except KeyError:
+            return False
+
+    def keys(self, plugin: str) -> list[str]:
+        return sorted(self._store.get(plugin, {}).keys())
+
+
+class _FakeRegistry:
+    """Minimal registry stand-in."""
+
+    def __init__(self):
+        self._modules: dict = {}
+
+
+class _FakeBot:
+    """Minimal bot stand-in that captures sent/replied messages."""
+
+    def __init__(self, *, admin: bool = False, config: dict | None = None):
+        self.sent: list[tuple[str, str]] = []
+        self.actions: list[tuple[str, str]] = []
+        self.replied: list[str] = []
+        self.state = _FakeState()
+        self._pstate: dict = {}
+        self.registry = _FakeRegistry()
+        self._admin = admin
+        self.config = config or {}
+
+    async def send(self, target: str, text: str) -> None:
+        self.sent.append((target, text))
+
+    async def action(self, target: str, text: str) -> None:
+        self.actions.append((target, text))
+
+    async def reply(self, message, text: str) -> None:
+        self.replied.append(text)
+
+    async def long_reply(self, message, lines, *, label: str = "") -> None:
+        for line in lines:
+            self.replied.append(line)
+
+    def _is_admin(self, message) -> bool:
+        return self._admin
+
+
+def _msg(text: str, nick: str = "alice", target: str = "#test") -> Message:
+    """Create a channel PRIVMSG."""
+    return Message(
+        raw="", prefix=f"{nick}!~{nick}@host", nick=nick,
+        command="PRIVMSG", params=[target, text], tags={},
+    )
+
+
+def _pm(text: str, nick: str = "alice") -> Message:
+    """Create a private PRIVMSG."""
+    return Message(
+        raw="", prefix=f"{nick}!~{nick}@host", nick=nick,
+        command="PRIVMSG", params=["botname", text], tags={},
+    )
+
+
+def _api_response(content: str = "Hello!", reasoning: str = "") -> dict:
+    """Build a mock API response."""
+    msg = {"role": "assistant", "content": content}
+    if reasoning:
+        msg["reasoning"] = reasoning
+    return {"choices": [{"message": msg}]}
+
+
+class _FakeResp:
+    """Mock HTTP response."""
+
+    def __init__(self, data: dict):
+        self._data = json.dumps(data).encode()
+
+    def read(self):
+        return self._data
+
+    def close(self):
+        pass
+
+
+def _clear(bot=None) -> None:
+    """Reset per-bot plugin state between tests."""
+    if bot is None:
+        return
+    ps = _ps(bot)
+    ps["histories"].clear()
+    ps["cooldowns"].clear()
+    ps["model"] = ""
+
+
+# ---------------------------------------------------------------------------
+# TestTruncate
+# ---------------------------------------------------------------------------
+
+class TestTruncate:
+    def test_short_text_unchanged(self):
+        assert _truncate("hello") == "hello"
+
+    def test_exact_length_unchanged(self):
+        text = "a" * _MAX_REPLY_LEN
+        assert _truncate(text) == text
+
+    def test_long_text_truncated(self):
+        text = "a" * 600
+        result = _truncate(text)
+        assert len(result) == _MAX_REPLY_LEN
+        assert result.endswith("...")
+
+    def test_custom_max(self):
+        result = _truncate("abcdefghij", 7)
+        assert result == "abcd..."
+
+
+# ---------------------------------------------------------------------------
+# TestExtractReply
+# ---------------------------------------------------------------------------
+
+class TestExtractReply:
+    def test_normal_content(self):
+        data = _api_response(content="Hello world")
+        assert _extract_reply(data) == "Hello world"
+
+    def test_empty_content_falls_back_to_reasoning(self):
+        data = _api_response(content="", reasoning="Thinking about it")
+        assert _extract_reply(data) == "Thinking about it"
+
+    def test_content_preferred_over_reasoning(self):
+        data = _api_response(content="Answer", reasoning="Reasoning")
+        assert _extract_reply(data) == "Answer"
+
+    def test_empty_choices(self):
+        assert _extract_reply({"choices": []}) == ""
+
+    def test_no_choices(self):
+        assert _extract_reply({}) == ""
+
+    def test_whitespace_content_falls_back(self):
+        data = _api_response(content="  ", reasoning="Fallback")
+        assert _extract_reply(data) == "Fallback"
+
+
+# ---------------------------------------------------------------------------
+# TestGetApiKey
+# ---------------------------------------------------------------------------
+
+class TestGetApiKey:
+    def test_from_env(self):
+        bot = _FakeBot()
+        with patch.dict("os.environ", {"OPENROUTER_API_KEY": "env-key"}):
+            assert _get_api_key(bot) == "env-key"
+
+    def test_from_config(self):
+        bot = _FakeBot(config={"openrouter": {"api_key": "cfg-key"}})
+        with patch.dict("os.environ", {}, clear=True):
+            import os
+            os.environ.pop("OPENROUTER_API_KEY", None)
+            assert _get_api_key(bot) == "cfg-key"
+
+    def test_env_takes_precedence(self):
+        bot = _FakeBot(config={"openrouter": {"api_key": "cfg-key"}})
+        with patch.dict("os.environ", {"OPENROUTER_API_KEY": "env-key"}):
+            assert _get_api_key(bot) == "env-key"
+
+    def test_missing_returns_empty(self):
+        bot = _FakeBot()
+        with patch.dict("os.environ", {}, clear=True):
+            import os
+            os.environ.pop("OPENROUTER_API_KEY", None)
+            assert _get_api_key(bot) == ""
+
+
+# ---------------------------------------------------------------------------
+# TestGetModel
+# ---------------------------------------------------------------------------
+
+class TestGetModel:
+    def test_default_model(self):
+        bot = _FakeBot()
+        assert _get_model(bot) == "openrouter/auto"
+
+    def test_from_config(self):
+        bot = _FakeBot(config={"openrouter": {"model": "some/model"}})
+        assert _get_model(bot) == "some/model"
+
+    def test_runtime_override(self):
+        bot = _FakeBot(config={"openrouter": {"model": "some/model"}})
+        _ps(bot)["model"] = "override/model"
+        assert _get_model(bot) == "override/model"
+
+
+# ---------------------------------------------------------------------------
+# TestCooldown
+# ---------------------------------------------------------------------------
+
+class TestCooldown:
+    def test_first_request_not_limited(self):
+        bot = _FakeBot()
+        _clear(bot)
+        assert _check_cooldown(bot, "alice") is False
+
+    def test_second_request_within_cooldown(self):
+        bot = _FakeBot()
+        _clear(bot)
+        _set_cooldown(bot, "alice")
+        assert _check_cooldown(bot, "alice") is True
+
+    def test_different_users_independent(self):
+        bot = _FakeBot()
+        _clear(bot)
+        _set_cooldown(bot, "alice")
+        assert _check_cooldown(bot, "bob") is False
+
+    def test_after_cooldown_passes(self):
+        bot = _FakeBot()
+        _clear(bot)
+        _set_cooldown(bot, "alice")
+        # Simulate time passing
+        _ps(bot)["cooldowns"]["alice"] = time.monotonic() - _COOLDOWN - 1
+        assert _check_cooldown(bot, "alice") is False
+
+
+# ---------------------------------------------------------------------------
+# TestCmdAsk
+# ---------------------------------------------------------------------------
+
+class TestCmdAsk:
+    def test_no_args(self):
+        bot = _FakeBot()
+        asyncio.run(cmd_ask(bot, _msg("!ask")))
+        assert "Usage:" in bot.replied[0]
+
+    def test_empty_args(self):
+        bot = _FakeBot()
+        asyncio.run(cmd_ask(bot, _msg("!ask   ")))
+        assert "Usage:" in bot.replied[0]
+
+    def test_no_api_key(self):
+        bot = _FakeBot()
+        _clear(bot)
+        with patch.dict("os.environ", {}, clear=True):
+            import os
+            os.environ.pop("OPENROUTER_API_KEY", None)
+            asyncio.run(cmd_ask(bot, _msg("!ask what is python")))
+        assert "not configured" in bot.replied[0]
+
+    def test_success(self):
+        bot = _FakeBot()
+        _clear(bot)
+        resp = _FakeResp(_api_response(content="Python is a programming language."))
+
+        with patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"}):
+            with patch.object(_mod, "_urlopen", return_value=resp):
+                asyncio.run(cmd_ask(bot, _msg("!ask what is python")))
+
+        assert len(bot.replied) == 1
+        assert "Python is a programming language" in bot.replied[0]
+
+    def test_api_error_429(self):
+        bot = _FakeBot()
+        _clear(bot)
+        err = urllib.error.HTTPError(
+            "url", 429, "Too Many Requests", {}, None,
+        )
+
+        with patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"}):
+            with patch.object(_mod, "_urlopen", side_effect=err):
+                asyncio.run(cmd_ask(bot, _msg("!ask hello")))
+
+        assert "Rate limited" in bot.replied[0]
+
+    def test_api_error_500(self):
+        bot = _FakeBot()
+        _clear(bot)
+        err = urllib.error.HTTPError(
+            "url", 500, "Internal Server Error", {}, None,
+        )
+
+        with patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"}):
+            with patch.object(_mod, "_urlopen", side_effect=err):
+                asyncio.run(cmd_ask(bot, _msg("!ask hello")))
+
+        assert "API error" in bot.replied[0]
+        assert "500" in bot.replied[0]
+
+    def test_connection_error(self):
+        bot = _FakeBot()
+        _clear(bot)
+
+        with patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"}):
+            with patch.object(_mod, "_urlopen", side_effect=ConnectionError("fail")):
+                asyncio.run(cmd_ask(bot, _msg("!ask hello")))
+
+        assert "Request failed" in bot.replied[0]
+
+    def test_empty_response(self):
+        bot = _FakeBot()
+        _clear(bot)
+        resp = _FakeResp({"choices": []})
+
+        with patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"}):
+            with patch.object(_mod, "_urlopen", return_value=resp):
+                asyncio.run(cmd_ask(bot, _msg("!ask hello")))
+
+        assert "No response" in bot.replied[0]
+
+    def test_cooldown(self):
+        bot = _FakeBot()
+        _clear(bot)
+        resp = _FakeResp(_api_response(content="Hello!"))
+
+        with patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"}):
+            with patch.object(_mod, "_urlopen", return_value=resp):
+                asyncio.run(cmd_ask(bot, _msg("!ask first")))
+            bot.replied.clear()
+            asyncio.run(cmd_ask(bot, _msg("!ask second")))
+
+        assert "Cooldown" in bot.replied[0]
+
+    def test_response_truncation(self):
+        bot = _FakeBot()
+        _clear(bot)
+        long_text = "a" * 600
+        resp = _FakeResp(_api_response(content=long_text))
+
+        with patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"}):
+            with patch.object(_mod, "_urlopen", return_value=resp):
+                asyncio.run(cmd_ask(bot, _msg("!ask hello")))
+
+        assert len(bot.replied[0]) == _MAX_REPLY_LEN
+        assert bot.replied[0].endswith("...")
+
+    def test_reasoning_model_fallback(self):
+        bot = _FakeBot()
+        _clear(bot)
+        resp = _FakeResp(_api_response(content="", reasoning="Deep thought"))
+
+        with patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"}):
+            with patch.object(_mod, "_urlopen", return_value=resp):
+                asyncio.run(cmd_ask(bot, _msg("!ask meaning of life")))
+
+        assert "Deep thought" in bot.replied[0]
+
+    def test_multiline_uses_long_reply(self):
+        bot = _FakeBot()
+        _clear(bot)
+        resp = _FakeResp(_api_response(content="Line one\nLine two\nLine three"))
+
+        with patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"}):
+            with patch.object(_mod, "_urlopen", return_value=resp):
+                asyncio.run(cmd_ask(bot, _msg("!ask hello")))
+
+        assert len(bot.replied) == 3
+        assert bot.replied[0] == "Line one"
+
+
+# ---------------------------------------------------------------------------
+# TestCmdChat
+# ---------------------------------------------------------------------------
+
+class TestCmdChat:
+    def test_no_args(self):
+        bot = _FakeBot()
+        asyncio.run(cmd_chat(bot, _msg("!chat")))
+        assert "Usage:" in bot.replied[0]
+
+    def test_chat_with_history(self):
+        bot = _FakeBot()
+        _clear(bot)
+        resp1 = _FakeResp(_api_response(content="I am an assistant."))
+        resp2 = _FakeResp(_api_response(content="You asked who I am."))
+
+        with patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"}):
+            with patch.object(_mod, "_urlopen", return_value=resp1):
+                asyncio.run(cmd_chat(bot, _msg("!chat who are you")))
+            # Clear cooldown for second request
+            _ps(bot)["cooldowns"].clear()
+            with patch.object(_mod, "_urlopen", return_value=resp2) as mock_url:
+                asyncio.run(cmd_chat(bot, _msg("!chat what did I ask")))
+                # Verify the history was sent with the second request
+                call_args = mock_url.call_args
+                req = call_args[0][0]
+                body = json.loads(req.data)
+                # System + user1 + assistant1 + user2 = 4 messages
+                assert len(body["messages"]) == 4
+                assert body["messages"][1]["content"] == "who are you"
+                assert body["messages"][2]["content"] == "I am an assistant."
+                assert body["messages"][3]["content"] == "what did I ask"
+
+        assert "I am an assistant" in bot.replied[0]
+        assert "You asked who I am" in bot.replied[1]
+
+    def test_chat_clear(self):
+        bot = _FakeBot()
+        _clear(bot)
+        # Pre-populate history
+        _ps(bot)["histories"]["alice"] = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "hi"},
+        ]
+
+        asyncio.run(cmd_chat(bot, _msg("!chat clear")))
+        assert "cleared" in bot.replied[0].lower()
+        assert "alice" not in _ps(bot)["histories"]
+
+    def test_chat_cooldown(self):
+        bot = _FakeBot()
+        _clear(bot)
+        resp = _FakeResp(_api_response(content="Hello!"))
+
+        with patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"}):
+            with patch.object(_mod, "_urlopen", return_value=resp):
+                asyncio.run(cmd_chat(bot, _msg("!chat first")))
+            bot.replied.clear()
+            asyncio.run(cmd_chat(bot, _msg("!chat second")))
+
+        assert "Cooldown" in bot.replied[0]
+
+    def test_chat_model_show(self):
+        bot = _FakeBot()
+        _clear(bot)
+        asyncio.run(cmd_chat(bot, _msg("!chat model")))
+        assert "openrouter/auto" in bot.replied[0]
+
+    def test_chat_model_switch(self):
+        bot = _FakeBot(admin=True)
+        _clear(bot)
+        asyncio.run(cmd_chat(bot, _msg("!chat model meta-llama/llama-3.3-70b-instruct:free")))
+        assert "Model set to" in bot.replied[0]
+        assert _ps(bot)["model"] == "meta-llama/llama-3.3-70b-instruct:free"
+
+    def test_chat_models_list(self):
+        bot = _FakeBot()
+        _clear(bot)
+        asyncio.run(cmd_chat(bot, _msg("!chat models")))
+        assert len(bot.replied) >= 3
+        assert any("openrouter/auto" in r for r in bot.replied)
+
+    def test_chat_no_api_key(self):
+        bot = _FakeBot()
+        _clear(bot)
+        with patch.dict("os.environ", {}, clear=True):
+            import os
+            os.environ.pop("OPENROUTER_API_KEY", None)
+            asyncio.run(cmd_chat(bot, _msg("!chat hello")))
+        assert "not configured" in bot.replied[0]
+
+    def test_history_cap(self):
+        bot = _FakeBot()
+        _clear(bot)
+        # Pre-populate with MAX_HISTORY messages
+        ps = _ps(bot)
+        ps["histories"]["alice"] = [
+            {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg{i}"}
+            for i in range(_MAX_HISTORY)
+        ]
+
+        resp = _FakeResp(_api_response(content="Latest reply"))
+
+        with patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"}):
+            with patch.object(_mod, "_urlopen", return_value=resp):
+                asyncio.run(cmd_chat(bot, _msg("!chat overflow")))
+
+        history = ps["histories"]["alice"]
+        # History should be capped at MAX_HISTORY
+        assert len(history) <= _MAX_HISTORY
+
+    def test_chat_api_error_removes_user_msg(self):
+        """On API failure, the user message should be removed from history."""
+        bot = _FakeBot()
+        _clear(bot)
+        err = urllib.error.HTTPError(
+            "url", 500, "Internal Server Error", {}, None,
+        )
+
+        with patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"}):
+            with patch.object(_mod, "_urlopen", side_effect=err):
+                asyncio.run(cmd_chat(bot, _msg("!chat hello")))
+
+        ps = _ps(bot)
+        # History should be empty -- user msg was removed on failure
+        assert len(ps["histories"].get("alice", [])) == 0