"""Plugin: LLM chat via OpenRouter.""" from __future__ import annotations import asyncio import json import logging import os import time import urllib.request from derp.http import urlopen as _urlopen from derp.plugin import command _log = logging.getLogger(__name__) # -- Constants --------------------------------------------------------------- _API_URL = "https://openrouter.ai/api/v1/chat/completions" _DEFAULT_MODEL = "openrouter/auto" _TIMEOUT = 30 _MAX_HISTORY = 20 _MAX_REPLY_LEN = 400 _COOLDOWN = 5 _DEFAULT_SYSTEM = ( "You are a helpful IRC bot assistant. Keep responses concise and under 200 words." ) # -- Per-bot runtime state --------------------------------------------------- def _ps(bot): """Per-bot plugin runtime state.""" return bot._pstate.setdefault("llm", { "histories": {}, # {nick: [{"role": ..., "content": ...}, ...]} "cooldowns": {}, # {nick: monotonic_ts} "model": "", # override per-bot; empty = use default }) # -- Helpers ----------------------------------------------------------------- def _get_api_key(bot) -> str: """Resolve API key from env or config.""" return ( os.environ.get("OPENROUTER_API_KEY", "") or bot.config.get("openrouter", {}).get("api_key", "") ) def _get_model(bot) -> str: """Resolve current model.""" ps = _ps(bot) return ( ps["model"] or bot.config.get("openrouter", {}).get("model", "") or _DEFAULT_MODEL ) def _get_system_prompt(bot) -> str: """Resolve system prompt from config or default.""" return bot.config.get("openrouter", {}).get("system_prompt", _DEFAULT_SYSTEM) def _truncate(text: str, max_len: int = _MAX_REPLY_LEN) -> str: """Truncate text with ellipsis if needed.""" if len(text) <= max_len: return text return text[: max_len - 3].rstrip() + "..." def _check_cooldown(bot, nick: str) -> bool: """Return True if the user is within cooldown period.""" ps = _ps(bot) last = ps["cooldowns"].get(nick, 0) return (time.monotonic() - last) < _COOLDOWN def _set_cooldown(bot, nick: str) -> None: """Record a cooldown timestamp for a user.""" _ps(bot)["cooldowns"][nick] = time.monotonic() # -- Blocking HTTP call ------------------------------------------------------ def _chat_request(api_key: str, model: str, messages: list[dict]) -> dict: """Blocking OpenRouter chat completion. Run via executor. Returns the parsed JSON response dict. Raises on HTTP or connection errors. """ payload = json.dumps({ "model": model, "messages": messages, }).encode() req = urllib.request.Request(_API_URL, data=payload, method="POST") req.add_header("Authorization", f"Bearer {api_key}") req.add_header("Content-Type", "application/json") resp = _urlopen(req, timeout=_TIMEOUT) raw = resp.read() resp.close() return json.loads(raw) def _extract_reply(data: dict) -> str: """Extract reply text from OpenRouter response. Handles reasoning models that return content="" with a reasoning field. """ choices = data.get("choices", []) if not choices: return "" msg = choices[0].get("message", {}) content = (msg.get("content") or "").strip() if content: return content # Fallback for reasoning models reasoning = (msg.get("reasoning") or "").strip() return reasoning # -- Command handlers -------------------------------------------------------- @command("ask", help="Ask: !ask ") async def cmd_ask(bot, message): """Single-shot LLM question (no history). Usage: !ask """ parts = message.text.split(None, 1) if len(parts) < 2 or not parts[1].strip(): await bot.reply(message, "Usage: !ask ") return api_key = _get_api_key(bot) if not api_key: await bot.reply(message, "OpenRouter API key not configured") return nick = message.nick if _check_cooldown(bot, nick): await bot.reply(message, "Cooldown -- wait a few seconds") return prompt = parts[1].strip() model = _get_model(bot) system = _get_system_prompt(bot) messages = [ {"role": "system", "content": system}, {"role": "user", "content": prompt}, ] _set_cooldown(bot, nick) loop = asyncio.get_running_loop() try: data = await loop.run_in_executor( None, _chat_request, api_key, model, messages, ) except urllib.error.HTTPError as exc: if exc.code == 429: await bot.reply(message, "Rate limited by OpenRouter -- try again later") else: await bot.reply(message, f"API error: HTTP {exc.code}") return except Exception as exc: _log.warning("LLM request failed: %s", exc) await bot.reply(message, f"Request failed: {exc}") return reply = _extract_reply(data) if not reply: await bot.reply(message, "No response from model") return lines = _truncate(reply).split("\n") await bot.long_reply(message, lines, label="llm") @command("chat", help="Chat: !chat | clear | model [name] | models") async def cmd_chat(bot, message): """Conversational LLM chat with per-user history. Usage: !chat Send a message (maintains history) !chat clear Clear your conversation history !chat model Show current model !chat model Switch model !chat models List popular free models """ parts = message.text.split(None, 2) if len(parts) < 2 or not parts[1].strip(): await bot.reply(message, "Usage: !chat | clear | model [name] | models") return sub = parts[1].strip().lower() # -- Subcommands --------------------------------------------------------- if sub == "clear": ps = _ps(bot) nick = message.nick if nick in ps["histories"]: del ps["histories"][nick] await bot.reply(message, "Conversation cleared") return if sub == "model": if len(parts) > 2 and parts[2].strip(): new_model = parts[2].strip() _ps(bot)["model"] = new_model await bot.reply(message, f"Model set to: {new_model}") else: await bot.reply(message, f"Current model: {_get_model(bot)}") return if sub == "models": models = [ "openrouter/auto -- auto-route to best available", "google/gemma-3-27b-it:free", "meta-llama/llama-3.3-70b-instruct:free", "deepseek/deepseek-r1:free", "qwen/qwen3-235b-a22b:free", "mistralai/mistral-small-3.1-24b-instruct:free", ] await bot.long_reply(message, models, label="models") return # -- Chat path ----------------------------------------------------------- api_key = _get_api_key(bot) if not api_key: await bot.reply(message, "OpenRouter API key not configured") return nick = message.nick if _check_cooldown(bot, nick): await bot.reply(message, "Cooldown -- wait a few seconds") return # Reconstruct full user text (sub might be part of the message) user_text = message.text.split(None, 1)[1].strip() ps = _ps(bot) history = ps["histories"].setdefault(nick, []) # Build messages system = _get_system_prompt(bot) history.append({"role": "user", "content": user_text}) # Cap history if len(history) > _MAX_HISTORY: history[:] = history[-_MAX_HISTORY:] messages = [{"role": "system", "content": system}] + history model = _get_model(bot) _set_cooldown(bot, nick) loop = asyncio.get_running_loop() try: data = await loop.run_in_executor( None, _chat_request, api_key, model, messages, ) except urllib.error.HTTPError as exc: # Remove the failed user message from history history.pop() if exc.code == 429: await bot.reply(message, "Rate limited by OpenRouter -- try again later") else: await bot.reply(message, f"API error: HTTP {exc.code}") return except Exception as exc: history.pop() _log.warning("LLM request failed: %s", exc) await bot.reply(message, f"Request failed: {exc}") return reply = _extract_reply(data) if not reply: history.pop() await bot.reply(message, "No response from model") return # Store assistant reply in history history.append({"role": "assistant", "content": reply}) if len(history) > _MAX_HISTORY: history[:] = history[-_MAX_HISTORY:] lines = _truncate(reply).split("\n") await bot.long_reply(message, lines, label="llm")