Files
derp/plugins/llm.py
user 95981275b5 feat: add OpenRouter LLM chat plugin (!ask, !chat)
Single-shot (!ask) and conversational (!chat) LLM commands backed by
OpenRouter's API. Per-user history (20 msg cap), 5s cooldown, reasoning
model fallback, and model switching via subcommands.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-22 05:39:11 +01:00

299 lines
8.7 KiB
Python

"""Plugin: LLM chat via OpenRouter."""
from __future__ import annotations
import asyncio
import json
import logging
import os
import time
import urllib.request
from derp.http import urlopen as _urlopen
from derp.plugin import command
_log = logging.getLogger(__name__)
# -- Constants ---------------------------------------------------------------
_API_URL = "https://openrouter.ai/api/v1/chat/completions"
_DEFAULT_MODEL = "openrouter/auto"
_TIMEOUT = 30
_MAX_HISTORY = 20
_MAX_REPLY_LEN = 400
_COOLDOWN = 5
_DEFAULT_SYSTEM = (
"You are a helpful IRC bot assistant. Keep responses concise and under 200 words."
)
# -- Per-bot runtime state ---------------------------------------------------
def _ps(bot):
"""Per-bot plugin runtime state."""
return bot._pstate.setdefault("llm", {
"histories": {}, # {nick: [{"role": ..., "content": ...}, ...]}
"cooldowns": {}, # {nick: monotonic_ts}
"model": "", # override per-bot; empty = use default
})
# -- Helpers -----------------------------------------------------------------
def _get_api_key(bot) -> str:
"""Resolve API key from env or config."""
return (
os.environ.get("OPENROUTER_API_KEY", "")
or bot.config.get("openrouter", {}).get("api_key", "")
)
def _get_model(bot) -> str:
"""Resolve current model."""
ps = _ps(bot)
return (
ps["model"]
or bot.config.get("openrouter", {}).get("model", "")
or _DEFAULT_MODEL
)
def _get_system_prompt(bot) -> str:
"""Resolve system prompt from config or default."""
return bot.config.get("openrouter", {}).get("system_prompt", _DEFAULT_SYSTEM)
def _truncate(text: str, max_len: int = _MAX_REPLY_LEN) -> str:
"""Truncate text with ellipsis if needed."""
if len(text) <= max_len:
return text
return text[: max_len - 3].rstrip() + "..."
def _check_cooldown(bot, nick: str) -> bool:
"""Return True if the user is within cooldown period."""
ps = _ps(bot)
last = ps["cooldowns"].get(nick, 0)
return (time.monotonic() - last) < _COOLDOWN
def _set_cooldown(bot, nick: str) -> None:
"""Record a cooldown timestamp for a user."""
_ps(bot)["cooldowns"][nick] = time.monotonic()
# -- Blocking HTTP call ------------------------------------------------------
def _chat_request(api_key: str, model: str, messages: list[dict]) -> dict:
"""Blocking OpenRouter chat completion. Run via executor.
Returns the parsed JSON response dict.
Raises on HTTP or connection errors.
"""
payload = json.dumps({
"model": model,
"messages": messages,
}).encode()
req = urllib.request.Request(_API_URL, data=payload, method="POST")
req.add_header("Authorization", f"Bearer {api_key}")
req.add_header("Content-Type", "application/json")
resp = _urlopen(req, timeout=_TIMEOUT)
raw = resp.read()
resp.close()
return json.loads(raw)
def _extract_reply(data: dict) -> str:
"""Extract reply text from OpenRouter response.
Handles reasoning models that return content="" with a reasoning field.
"""
choices = data.get("choices", [])
if not choices:
return ""
msg = choices[0].get("message", {})
content = (msg.get("content") or "").strip()
if content:
return content
# Fallback for reasoning models
reasoning = (msg.get("reasoning") or "").strip()
return reasoning
# -- Command handlers --------------------------------------------------------
@command("ask", help="Ask: !ask <question>")
async def cmd_ask(bot, message):
"""Single-shot LLM question (no history).
Usage: !ask <question>
"""
parts = message.text.split(None, 1)
if len(parts) < 2 or not parts[1].strip():
await bot.reply(message, "Usage: !ask <question>")
return
api_key = _get_api_key(bot)
if not api_key:
await bot.reply(message, "OpenRouter API key not configured")
return
nick = message.nick
if _check_cooldown(bot, nick):
await bot.reply(message, "Cooldown -- wait a few seconds")
return
prompt = parts[1].strip()
model = _get_model(bot)
system = _get_system_prompt(bot)
messages = [
{"role": "system", "content": system},
{"role": "user", "content": prompt},
]
_set_cooldown(bot, nick)
loop = asyncio.get_running_loop()
try:
data = await loop.run_in_executor(
None, _chat_request, api_key, model, messages,
)
except urllib.error.HTTPError as exc:
if exc.code == 429:
await bot.reply(message, "Rate limited by OpenRouter -- try again later")
else:
await bot.reply(message, f"API error: HTTP {exc.code}")
return
except Exception as exc:
_log.warning("LLM request failed: %s", exc)
await bot.reply(message, f"Request failed: {exc}")
return
reply = _extract_reply(data)
if not reply:
await bot.reply(message, "No response from model")
return
lines = _truncate(reply).split("\n")
await bot.long_reply(message, lines, label="llm")
@command("chat", help="Chat: !chat <msg> | clear | model [name] | models")
async def cmd_chat(bot, message):
"""Conversational LLM chat with per-user history.
Usage:
!chat <message> Send a message (maintains history)
!chat clear Clear your conversation history
!chat model Show current model
!chat model <name> Switch model
!chat models List popular free models
"""
parts = message.text.split(None, 2)
if len(parts) < 2 or not parts[1].strip():
await bot.reply(message, "Usage: !chat <message> | clear | model [name] | models")
return
sub = parts[1].strip().lower()
# -- Subcommands ---------------------------------------------------------
if sub == "clear":
ps = _ps(bot)
nick = message.nick
if nick in ps["histories"]:
del ps["histories"][nick]
await bot.reply(message, "Conversation cleared")
return
if sub == "model":
if len(parts) > 2 and parts[2].strip():
new_model = parts[2].strip()
_ps(bot)["model"] = new_model
await bot.reply(message, f"Model set to: {new_model}")
else:
await bot.reply(message, f"Current model: {_get_model(bot)}")
return
if sub == "models":
models = [
"openrouter/auto -- auto-route to best available",
"google/gemma-3-27b-it:free",
"meta-llama/llama-3.3-70b-instruct:free",
"deepseek/deepseek-r1:free",
"qwen/qwen3-235b-a22b:free",
"mistralai/mistral-small-3.1-24b-instruct:free",
]
await bot.long_reply(message, models, label="models")
return
# -- Chat path -----------------------------------------------------------
api_key = _get_api_key(bot)
if not api_key:
await bot.reply(message, "OpenRouter API key not configured")
return
nick = message.nick
if _check_cooldown(bot, nick):
await bot.reply(message, "Cooldown -- wait a few seconds")
return
# Reconstruct full user text (sub might be part of the message)
user_text = message.text.split(None, 1)[1].strip()
ps = _ps(bot)
history = ps["histories"].setdefault(nick, [])
# Build messages
system = _get_system_prompt(bot)
history.append({"role": "user", "content": user_text})
# Cap history
if len(history) > _MAX_HISTORY:
history[:] = history[-_MAX_HISTORY:]
messages = [{"role": "system", "content": system}] + history
model = _get_model(bot)
_set_cooldown(bot, nick)
loop = asyncio.get_running_loop()
try:
data = await loop.run_in_executor(
None, _chat_request, api_key, model, messages,
)
except urllib.error.HTTPError as exc:
# Remove the failed user message from history
history.pop()
if exc.code == 429:
await bot.reply(message, "Rate limited by OpenRouter -- try again later")
else:
await bot.reply(message, f"API error: HTTP {exc.code}")
return
except Exception as exc:
history.pop()
_log.warning("LLM request failed: %s", exc)
await bot.reply(message, f"Request failed: {exc}")
return
reply = _extract_reply(data)
if not reply:
history.pop()
await bot.reply(message, "No response from model")
return
# Store assistant reply in history
history.append({"role": "assistant", "content": reply})
if len(history) > _MAX_HISTORY:
history[:] = history[-_MAX_HISTORY:]
lines = _truncate(reply).split("\n")
await bot.long_reply(message, lines, label="llm")