feat: add OpenRouter LLM chat plugin (!ask, !chat)
Single-shot (!ask) and conversational (!chat) LLM commands backed by OpenRouter's API. Per-user history (20 msg cap), 5s cooldown, reasoning model fallback, and model switching via subcommands. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
298
plugins/llm.py
Normal file
298
plugins/llm.py
Normal file
@@ -0,0 +1,298 @@
|
||||
"""Plugin: LLM chat via OpenRouter."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import urllib.request
|
||||
|
||||
from derp.http import urlopen as _urlopen
|
||||
from derp.plugin import command
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
# -- Constants ---------------------------------------------------------------
|
||||
|
||||
_API_URL = "https://openrouter.ai/api/v1/chat/completions"
|
||||
_DEFAULT_MODEL = "openrouter/auto"
|
||||
_TIMEOUT = 30
|
||||
_MAX_HISTORY = 20
|
||||
_MAX_REPLY_LEN = 400
|
||||
_COOLDOWN = 5
|
||||
|
||||
_DEFAULT_SYSTEM = (
|
||||
"You are a helpful IRC bot assistant. Keep responses concise and under 200 words."
|
||||
)
|
||||
|
||||
|
||||
# -- Per-bot runtime state ---------------------------------------------------
|
||||
|
||||
def _ps(bot):
|
||||
"""Per-bot plugin runtime state."""
|
||||
return bot._pstate.setdefault("llm", {
|
||||
"histories": {}, # {nick: [{"role": ..., "content": ...}, ...]}
|
||||
"cooldowns": {}, # {nick: monotonic_ts}
|
||||
"model": "", # override per-bot; empty = use default
|
||||
})
|
||||
|
||||
|
||||
# -- Helpers -----------------------------------------------------------------
|
||||
|
||||
def _get_api_key(bot) -> str:
|
||||
"""Resolve API key from env or config."""
|
||||
return (
|
||||
os.environ.get("OPENROUTER_API_KEY", "")
|
||||
or bot.config.get("openrouter", {}).get("api_key", "")
|
||||
)
|
||||
|
||||
|
||||
def _get_model(bot) -> str:
|
||||
"""Resolve current model."""
|
||||
ps = _ps(bot)
|
||||
return (
|
||||
ps["model"]
|
||||
or bot.config.get("openrouter", {}).get("model", "")
|
||||
or _DEFAULT_MODEL
|
||||
)
|
||||
|
||||
|
||||
def _get_system_prompt(bot) -> str:
|
||||
"""Resolve system prompt from config or default."""
|
||||
return bot.config.get("openrouter", {}).get("system_prompt", _DEFAULT_SYSTEM)
|
||||
|
||||
|
||||
def _truncate(text: str, max_len: int = _MAX_REPLY_LEN) -> str:
|
||||
"""Truncate text with ellipsis if needed."""
|
||||
if len(text) <= max_len:
|
||||
return text
|
||||
return text[: max_len - 3].rstrip() + "..."
|
||||
|
||||
|
||||
def _check_cooldown(bot, nick: str) -> bool:
|
||||
"""Return True if the user is within cooldown period."""
|
||||
ps = _ps(bot)
|
||||
last = ps["cooldowns"].get(nick, 0)
|
||||
return (time.monotonic() - last) < _COOLDOWN
|
||||
|
||||
|
||||
def _set_cooldown(bot, nick: str) -> None:
|
||||
"""Record a cooldown timestamp for a user."""
|
||||
_ps(bot)["cooldowns"][nick] = time.monotonic()
|
||||
|
||||
|
||||
# -- Blocking HTTP call ------------------------------------------------------
|
||||
|
||||
def _chat_request(api_key: str, model: str, messages: list[dict]) -> dict:
|
||||
"""Blocking OpenRouter chat completion. Run via executor.
|
||||
|
||||
Returns the parsed JSON response dict.
|
||||
Raises on HTTP or connection errors.
|
||||
"""
|
||||
payload = json.dumps({
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(_API_URL, data=payload, method="POST")
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
req.add_header("Content-Type", "application/json")
|
||||
|
||||
resp = _urlopen(req, timeout=_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
return json.loads(raw)
|
||||
|
||||
|
||||
def _extract_reply(data: dict) -> str:
|
||||
"""Extract reply text from OpenRouter response.
|
||||
|
||||
Handles reasoning models that return content="" with a reasoning field.
|
||||
"""
|
||||
choices = data.get("choices", [])
|
||||
if not choices:
|
||||
return ""
|
||||
|
||||
msg = choices[0].get("message", {})
|
||||
content = (msg.get("content") or "").strip()
|
||||
if content:
|
||||
return content
|
||||
|
||||
# Fallback for reasoning models
|
||||
reasoning = (msg.get("reasoning") or "").strip()
|
||||
return reasoning
|
||||
|
||||
|
||||
# -- Command handlers --------------------------------------------------------
|
||||
|
||||
@command("ask", help="Ask: !ask <question>")
|
||||
async def cmd_ask(bot, message):
|
||||
"""Single-shot LLM question (no history).
|
||||
|
||||
Usage: !ask <question>
|
||||
"""
|
||||
parts = message.text.split(None, 1)
|
||||
if len(parts) < 2 or not parts[1].strip():
|
||||
await bot.reply(message, "Usage: !ask <question>")
|
||||
return
|
||||
|
||||
api_key = _get_api_key(bot)
|
||||
if not api_key:
|
||||
await bot.reply(message, "OpenRouter API key not configured")
|
||||
return
|
||||
|
||||
nick = message.nick
|
||||
if _check_cooldown(bot, nick):
|
||||
await bot.reply(message, "Cooldown -- wait a few seconds")
|
||||
return
|
||||
|
||||
prompt = parts[1].strip()
|
||||
model = _get_model(bot)
|
||||
system = _get_system_prompt(bot)
|
||||
messages = [
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": prompt},
|
||||
]
|
||||
|
||||
_set_cooldown(bot, nick)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
try:
|
||||
data = await loop.run_in_executor(
|
||||
None, _chat_request, api_key, model, messages,
|
||||
)
|
||||
except urllib.error.HTTPError as exc:
|
||||
if exc.code == 429:
|
||||
await bot.reply(message, "Rate limited by OpenRouter -- try again later")
|
||||
else:
|
||||
await bot.reply(message, f"API error: HTTP {exc.code}")
|
||||
return
|
||||
except Exception as exc:
|
||||
_log.warning("LLM request failed: %s", exc)
|
||||
await bot.reply(message, f"Request failed: {exc}")
|
||||
return
|
||||
|
||||
reply = _extract_reply(data)
|
||||
if not reply:
|
||||
await bot.reply(message, "No response from model")
|
||||
return
|
||||
|
||||
lines = _truncate(reply).split("\n")
|
||||
await bot.long_reply(message, lines, label="llm")
|
||||
|
||||
|
||||
@command("chat", help="Chat: !chat <msg> | clear | model [name] | models")
|
||||
async def cmd_chat(bot, message):
|
||||
"""Conversational LLM chat with per-user history.
|
||||
|
||||
Usage:
|
||||
!chat <message> Send a message (maintains history)
|
||||
!chat clear Clear your conversation history
|
||||
!chat model Show current model
|
||||
!chat model <name> Switch model
|
||||
!chat models List popular free models
|
||||
"""
|
||||
parts = message.text.split(None, 2)
|
||||
if len(parts) < 2 or not parts[1].strip():
|
||||
await bot.reply(message, "Usage: !chat <message> | clear | model [name] | models")
|
||||
return
|
||||
|
||||
sub = parts[1].strip().lower()
|
||||
|
||||
# -- Subcommands ---------------------------------------------------------
|
||||
|
||||
if sub == "clear":
|
||||
ps = _ps(bot)
|
||||
nick = message.nick
|
||||
if nick in ps["histories"]:
|
||||
del ps["histories"][nick]
|
||||
await bot.reply(message, "Conversation cleared")
|
||||
return
|
||||
|
||||
if sub == "model":
|
||||
if len(parts) > 2 and parts[2].strip():
|
||||
new_model = parts[2].strip()
|
||||
_ps(bot)["model"] = new_model
|
||||
await bot.reply(message, f"Model set to: {new_model}")
|
||||
else:
|
||||
await bot.reply(message, f"Current model: {_get_model(bot)}")
|
||||
return
|
||||
|
||||
if sub == "models":
|
||||
models = [
|
||||
"openrouter/auto -- auto-route to best available",
|
||||
"google/gemma-3-27b-it:free",
|
||||
"meta-llama/llama-3.3-70b-instruct:free",
|
||||
"deepseek/deepseek-r1:free",
|
||||
"qwen/qwen3-235b-a22b:free",
|
||||
"mistralai/mistral-small-3.1-24b-instruct:free",
|
||||
]
|
||||
await bot.long_reply(message, models, label="models")
|
||||
return
|
||||
|
||||
# -- Chat path -----------------------------------------------------------
|
||||
|
||||
api_key = _get_api_key(bot)
|
||||
if not api_key:
|
||||
await bot.reply(message, "OpenRouter API key not configured")
|
||||
return
|
||||
|
||||
nick = message.nick
|
||||
if _check_cooldown(bot, nick):
|
||||
await bot.reply(message, "Cooldown -- wait a few seconds")
|
||||
return
|
||||
|
||||
# Reconstruct full user text (sub might be part of the message)
|
||||
user_text = message.text.split(None, 1)[1].strip()
|
||||
|
||||
ps = _ps(bot)
|
||||
history = ps["histories"].setdefault(nick, [])
|
||||
|
||||
# Build messages
|
||||
system = _get_system_prompt(bot)
|
||||
history.append({"role": "user", "content": user_text})
|
||||
|
||||
# Cap history
|
||||
if len(history) > _MAX_HISTORY:
|
||||
history[:] = history[-_MAX_HISTORY:]
|
||||
|
||||
messages = [{"role": "system", "content": system}] + history
|
||||
|
||||
model = _get_model(bot)
|
||||
_set_cooldown(bot, nick)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
try:
|
||||
data = await loop.run_in_executor(
|
||||
None, _chat_request, api_key, model, messages,
|
||||
)
|
||||
except urllib.error.HTTPError as exc:
|
||||
# Remove the failed user message from history
|
||||
history.pop()
|
||||
if exc.code == 429:
|
||||
await bot.reply(message, "Rate limited by OpenRouter -- try again later")
|
||||
else:
|
||||
await bot.reply(message, f"API error: HTTP {exc.code}")
|
||||
return
|
||||
except Exception as exc:
|
||||
history.pop()
|
||||
_log.warning("LLM request failed: %s", exc)
|
||||
await bot.reply(message, f"Request failed: {exc}")
|
||||
return
|
||||
|
||||
reply = _extract_reply(data)
|
||||
if not reply:
|
||||
history.pop()
|
||||
await bot.reply(message, "No response from model")
|
||||
return
|
||||
|
||||
# Store assistant reply in history
|
||||
history.append({"role": "assistant", "content": reply})
|
||||
if len(history) > _MAX_HISTORY:
|
||||
history[:] = history[-_MAX_HISTORY:]
|
||||
|
||||
lines = _truncate(reply).split("\n")
|
||||
await bot.long_reply(message, lines, label="llm")
|
||||
Reference in New Issue
Block a user