Single-shot (!ask) and conversational (!chat) LLM commands backed by OpenRouter's API. Per-user history (20 msg cap), 5s cooldown, reasoning model fallback, and model switching via subcommands. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
299 lines
8.7 KiB
Python
299 lines
8.7 KiB
Python
"""Plugin: LLM chat via OpenRouter."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import os
|
|
import time
|
|
import urllib.request
|
|
|
|
from derp.http import urlopen as _urlopen
|
|
from derp.plugin import command
|
|
|
|
_log = logging.getLogger(__name__)
|
|
|
|
# -- Constants ---------------------------------------------------------------
|
|
|
|
_API_URL = "https://openrouter.ai/api/v1/chat/completions"
|
|
_DEFAULT_MODEL = "openrouter/auto"
|
|
_TIMEOUT = 30
|
|
_MAX_HISTORY = 20
|
|
_MAX_REPLY_LEN = 400
|
|
_COOLDOWN = 5
|
|
|
|
_DEFAULT_SYSTEM = (
|
|
"You are a helpful IRC bot assistant. Keep responses concise and under 200 words."
|
|
)
|
|
|
|
|
|
# -- Per-bot runtime state ---------------------------------------------------
|
|
|
|
def _ps(bot):
|
|
"""Per-bot plugin runtime state."""
|
|
return bot._pstate.setdefault("llm", {
|
|
"histories": {}, # {nick: [{"role": ..., "content": ...}, ...]}
|
|
"cooldowns": {}, # {nick: monotonic_ts}
|
|
"model": "", # override per-bot; empty = use default
|
|
})
|
|
|
|
|
|
# -- Helpers -----------------------------------------------------------------
|
|
|
|
def _get_api_key(bot) -> str:
|
|
"""Resolve API key from env or config."""
|
|
return (
|
|
os.environ.get("OPENROUTER_API_KEY", "")
|
|
or bot.config.get("openrouter", {}).get("api_key", "")
|
|
)
|
|
|
|
|
|
def _get_model(bot) -> str:
|
|
"""Resolve current model."""
|
|
ps = _ps(bot)
|
|
return (
|
|
ps["model"]
|
|
or bot.config.get("openrouter", {}).get("model", "")
|
|
or _DEFAULT_MODEL
|
|
)
|
|
|
|
|
|
def _get_system_prompt(bot) -> str:
|
|
"""Resolve system prompt from config or default."""
|
|
return bot.config.get("openrouter", {}).get("system_prompt", _DEFAULT_SYSTEM)
|
|
|
|
|
|
def _truncate(text: str, max_len: int = _MAX_REPLY_LEN) -> str:
|
|
"""Truncate text with ellipsis if needed."""
|
|
if len(text) <= max_len:
|
|
return text
|
|
return text[: max_len - 3].rstrip() + "..."
|
|
|
|
|
|
def _check_cooldown(bot, nick: str) -> bool:
|
|
"""Return True if the user is within cooldown period."""
|
|
ps = _ps(bot)
|
|
last = ps["cooldowns"].get(nick, 0)
|
|
return (time.monotonic() - last) < _COOLDOWN
|
|
|
|
|
|
def _set_cooldown(bot, nick: str) -> None:
|
|
"""Record a cooldown timestamp for a user."""
|
|
_ps(bot)["cooldowns"][nick] = time.monotonic()
|
|
|
|
|
|
# -- Blocking HTTP call ------------------------------------------------------
|
|
|
|
def _chat_request(api_key: str, model: str, messages: list[dict]) -> dict:
|
|
"""Blocking OpenRouter chat completion. Run via executor.
|
|
|
|
Returns the parsed JSON response dict.
|
|
Raises on HTTP or connection errors.
|
|
"""
|
|
payload = json.dumps({
|
|
"model": model,
|
|
"messages": messages,
|
|
}).encode()
|
|
|
|
req = urllib.request.Request(_API_URL, data=payload, method="POST")
|
|
req.add_header("Authorization", f"Bearer {api_key}")
|
|
req.add_header("Content-Type", "application/json")
|
|
|
|
resp = _urlopen(req, timeout=_TIMEOUT)
|
|
raw = resp.read()
|
|
resp.close()
|
|
|
|
return json.loads(raw)
|
|
|
|
|
|
def _extract_reply(data: dict) -> str:
|
|
"""Extract reply text from OpenRouter response.
|
|
|
|
Handles reasoning models that return content="" with a reasoning field.
|
|
"""
|
|
choices = data.get("choices", [])
|
|
if not choices:
|
|
return ""
|
|
|
|
msg = choices[0].get("message", {})
|
|
content = (msg.get("content") or "").strip()
|
|
if content:
|
|
return content
|
|
|
|
# Fallback for reasoning models
|
|
reasoning = (msg.get("reasoning") or "").strip()
|
|
return reasoning
|
|
|
|
|
|
# -- Command handlers --------------------------------------------------------
|
|
|
|
@command("ask", help="Ask: !ask <question>")
|
|
async def cmd_ask(bot, message):
|
|
"""Single-shot LLM question (no history).
|
|
|
|
Usage: !ask <question>
|
|
"""
|
|
parts = message.text.split(None, 1)
|
|
if len(parts) < 2 or not parts[1].strip():
|
|
await bot.reply(message, "Usage: !ask <question>")
|
|
return
|
|
|
|
api_key = _get_api_key(bot)
|
|
if not api_key:
|
|
await bot.reply(message, "OpenRouter API key not configured")
|
|
return
|
|
|
|
nick = message.nick
|
|
if _check_cooldown(bot, nick):
|
|
await bot.reply(message, "Cooldown -- wait a few seconds")
|
|
return
|
|
|
|
prompt = parts[1].strip()
|
|
model = _get_model(bot)
|
|
system = _get_system_prompt(bot)
|
|
messages = [
|
|
{"role": "system", "content": system},
|
|
{"role": "user", "content": prompt},
|
|
]
|
|
|
|
_set_cooldown(bot, nick)
|
|
|
|
loop = asyncio.get_running_loop()
|
|
try:
|
|
data = await loop.run_in_executor(
|
|
None, _chat_request, api_key, model, messages,
|
|
)
|
|
except urllib.error.HTTPError as exc:
|
|
if exc.code == 429:
|
|
await bot.reply(message, "Rate limited by OpenRouter -- try again later")
|
|
else:
|
|
await bot.reply(message, f"API error: HTTP {exc.code}")
|
|
return
|
|
except Exception as exc:
|
|
_log.warning("LLM request failed: %s", exc)
|
|
await bot.reply(message, f"Request failed: {exc}")
|
|
return
|
|
|
|
reply = _extract_reply(data)
|
|
if not reply:
|
|
await bot.reply(message, "No response from model")
|
|
return
|
|
|
|
lines = _truncate(reply).split("\n")
|
|
await bot.long_reply(message, lines, label="llm")
|
|
|
|
|
|
@command("chat", help="Chat: !chat <msg> | clear | model [name] | models")
|
|
async def cmd_chat(bot, message):
|
|
"""Conversational LLM chat with per-user history.
|
|
|
|
Usage:
|
|
!chat <message> Send a message (maintains history)
|
|
!chat clear Clear your conversation history
|
|
!chat model Show current model
|
|
!chat model <name> Switch model
|
|
!chat models List popular free models
|
|
"""
|
|
parts = message.text.split(None, 2)
|
|
if len(parts) < 2 or not parts[1].strip():
|
|
await bot.reply(message, "Usage: !chat <message> | clear | model [name] | models")
|
|
return
|
|
|
|
sub = parts[1].strip().lower()
|
|
|
|
# -- Subcommands ---------------------------------------------------------
|
|
|
|
if sub == "clear":
|
|
ps = _ps(bot)
|
|
nick = message.nick
|
|
if nick in ps["histories"]:
|
|
del ps["histories"][nick]
|
|
await bot.reply(message, "Conversation cleared")
|
|
return
|
|
|
|
if sub == "model":
|
|
if len(parts) > 2 and parts[2].strip():
|
|
new_model = parts[2].strip()
|
|
_ps(bot)["model"] = new_model
|
|
await bot.reply(message, f"Model set to: {new_model}")
|
|
else:
|
|
await bot.reply(message, f"Current model: {_get_model(bot)}")
|
|
return
|
|
|
|
if sub == "models":
|
|
models = [
|
|
"openrouter/auto -- auto-route to best available",
|
|
"google/gemma-3-27b-it:free",
|
|
"meta-llama/llama-3.3-70b-instruct:free",
|
|
"deepseek/deepseek-r1:free",
|
|
"qwen/qwen3-235b-a22b:free",
|
|
"mistralai/mistral-small-3.1-24b-instruct:free",
|
|
]
|
|
await bot.long_reply(message, models, label="models")
|
|
return
|
|
|
|
# -- Chat path -----------------------------------------------------------
|
|
|
|
api_key = _get_api_key(bot)
|
|
if not api_key:
|
|
await bot.reply(message, "OpenRouter API key not configured")
|
|
return
|
|
|
|
nick = message.nick
|
|
if _check_cooldown(bot, nick):
|
|
await bot.reply(message, "Cooldown -- wait a few seconds")
|
|
return
|
|
|
|
# Reconstruct full user text (sub might be part of the message)
|
|
user_text = message.text.split(None, 1)[1].strip()
|
|
|
|
ps = _ps(bot)
|
|
history = ps["histories"].setdefault(nick, [])
|
|
|
|
# Build messages
|
|
system = _get_system_prompt(bot)
|
|
history.append({"role": "user", "content": user_text})
|
|
|
|
# Cap history
|
|
if len(history) > _MAX_HISTORY:
|
|
history[:] = history[-_MAX_HISTORY:]
|
|
|
|
messages = [{"role": "system", "content": system}] + history
|
|
|
|
model = _get_model(bot)
|
|
_set_cooldown(bot, nick)
|
|
|
|
loop = asyncio.get_running_loop()
|
|
try:
|
|
data = await loop.run_in_executor(
|
|
None, _chat_request, api_key, model, messages,
|
|
)
|
|
except urllib.error.HTTPError as exc:
|
|
# Remove the failed user message from history
|
|
history.pop()
|
|
if exc.code == 429:
|
|
await bot.reply(message, "Rate limited by OpenRouter -- try again later")
|
|
else:
|
|
await bot.reply(message, f"API error: HTTP {exc.code}")
|
|
return
|
|
except Exception as exc:
|
|
history.pop()
|
|
_log.warning("LLM request failed: %s", exc)
|
|
await bot.reply(message, f"Request failed: {exc}")
|
|
return
|
|
|
|
reply = _extract_reply(data)
|
|
if not reply:
|
|
history.pop()
|
|
await bot.reply(message, "No response from model")
|
|
return
|
|
|
|
# Store assistant reply in history
|
|
history.append({"role": "assistant", "content": reply})
|
|
if len(history) > _MAX_HISTORY:
|
|
history[:] = history[-_MAX_HISTORY:]
|
|
|
|
lines = _truncate(reply).split("\n")
|
|
await bot.long_reply(message, lines, label="llm")
|