derp/plugins/llm.py

"""Plugin: LLM chat via OpenRouter."""

from __future__ import annotations

import asyncio
import json
import logging
import os
import time
import urllib.request

from derp.http import urlopen as _urlopen
from derp.plugin import command

_log = logging.getLogger(__name__)

# -- Constants ---------------------------------------------------------------

_API_URL = "https://openrouter.ai/api/v1/chat/completions"
_DEFAULT_MODEL = "openrouter/auto"
_TIMEOUT = 30
_MAX_HISTORY = 20
_MAX_REPLY_LEN = 400
_COOLDOWN = 5

_DEFAULT_SYSTEM = (
    "You are a helpful IRC bot assistant. Keep responses concise and under 200 words."
)


# -- Per-bot runtime state ---------------------------------------------------

def _ps(bot):
    """Per-bot plugin runtime state."""
    return bot._pstate.setdefault("llm", {
        "histories": {},   # {nick: [{"role": ..., "content": ...}, ...]}
        "cooldowns": {},   # {nick: monotonic_ts}
        "model": "",       # override per-bot; empty = use default
    })


# -- Helpers -----------------------------------------------------------------

def _get_api_key(bot) -> str:
    """Resolve API key from env or config."""
    return (
        os.environ.get("OPENROUTER_API_KEY", "")
        or bot.config.get("openrouter", {}).get("api_key", "")
    )


def _get_model(bot) -> str:
    """Resolve current model."""
    ps = _ps(bot)
    return (
        ps["model"]
        or bot.config.get("openrouter", {}).get("model", "")
        or _DEFAULT_MODEL
    )


def _get_system_prompt(bot) -> str:
    """Resolve system prompt from config or default."""
    return bot.config.get("openrouter", {}).get("system_prompt", _DEFAULT_SYSTEM)


def _truncate(text: str, max_len: int = _MAX_REPLY_LEN) -> str:
    """Truncate text with ellipsis if needed."""
    if len(text) <= max_len:
        return text
    return text[: max_len - 3].rstrip() + "..."


def _check_cooldown(bot, nick: str) -> bool:
    """Return True if the user is within cooldown period."""
    ps = _ps(bot)
    last = ps["cooldowns"].get(nick, 0)
    return (time.monotonic() - last) < _COOLDOWN


def _set_cooldown(bot, nick: str) -> None:
    """Record a cooldown timestamp for a user."""
    _ps(bot)["cooldowns"][nick] = time.monotonic()


# -- Blocking HTTP call ------------------------------------------------------

def _chat_request(api_key: str, model: str, messages: list[dict]) -> dict:
    """Blocking OpenRouter chat completion. Run via executor.

    Returns the parsed JSON response dict.
    Raises on HTTP or connection errors.
    """
    payload = json.dumps({
        "model": model,
        "messages": messages,
    }).encode()

    req = urllib.request.Request(_API_URL, data=payload, method="POST")
    req.add_header("Authorization", f"Bearer {api_key}")
    req.add_header("Content-Type", "application/json")

    resp = _urlopen(req, timeout=_TIMEOUT)
    raw = resp.read()
    resp.close()

    return json.loads(raw)


def _extract_reply(data: dict) -> str:
    """Extract reply text from OpenRouter response.

    Handles reasoning models that return content="" with a reasoning field.
    """
    choices = data.get("choices", [])
    if not choices:
        return ""

    msg = choices[0].get("message", {})
    content = (msg.get("content") or "").strip()
    if content:
        return content

    # Fallback for reasoning models
    reasoning = (msg.get("reasoning") or "").strip()
    return reasoning


# -- Command handlers --------------------------------------------------------

@command("ask", help="Ask: !ask <question>")
async def cmd_ask(bot, message):
    """Single-shot LLM question (no history).

    Usage: !ask <question>
    """
    parts = message.text.split(None, 1)
    if len(parts) < 2 or not parts[1].strip():
        await bot.reply(message, "Usage: !ask <question>")
        return

    api_key = _get_api_key(bot)
    if not api_key:
        await bot.reply(message, "OpenRouter API key not configured")
        return

    nick = message.nick
    if _check_cooldown(bot, nick):
        await bot.reply(message, "Cooldown -- wait a few seconds")
        return

    prompt = parts[1].strip()
    model = _get_model(bot)
    system = _get_system_prompt(bot)
    messages = [
        {"role": "system", "content": system},
        {"role": "user", "content": prompt},
    ]

    _set_cooldown(bot, nick)

    loop = asyncio.get_running_loop()
    try:
        data = await loop.run_in_executor(
            None, _chat_request, api_key, model, messages,
        )
    except urllib.error.HTTPError as exc:
        if exc.code == 429:
            await bot.reply(message, "Rate limited by OpenRouter -- try again later")
        else:
            await bot.reply(message, f"API error: HTTP {exc.code}")
        return
    except Exception as exc:
        _log.warning("LLM request failed: %s", exc)
        await bot.reply(message, f"Request failed: {exc}")
        return

    reply = _extract_reply(data)
    if not reply:
        await bot.reply(message, "No response from model")
        return

    lines = _truncate(reply).split("\n")
    await bot.long_reply(message, lines, label="llm")


@command("chat", help="Chat: !chat <msg> | clear | model [name] | models")
async def cmd_chat(bot, message):
    """Conversational LLM chat with per-user history.

    Usage:
        !chat <message>      Send a message (maintains history)
        !chat clear           Clear your conversation history
        !chat model           Show current model
        !chat model <name>    Switch model
        !chat models          List popular free models
    """
    parts = message.text.split(None, 2)
    if len(parts) < 2 or not parts[1].strip():
        await bot.reply(message, "Usage: !chat <message> | clear | model [name] | models")
        return

    sub = parts[1].strip().lower()

    # -- Subcommands ---------------------------------------------------------

    if sub == "clear":
        ps = _ps(bot)
        nick = message.nick
        if nick in ps["histories"]:
            del ps["histories"][nick]
        await bot.reply(message, "Conversation cleared")
        return

    if sub == "model":
        if len(parts) > 2 and parts[2].strip():
            new_model = parts[2].strip()
            _ps(bot)["model"] = new_model
            await bot.reply(message, f"Model set to: {new_model}")
        else:
            await bot.reply(message, f"Current model: {_get_model(bot)}")
        return

    if sub == "models":
        models = [
            "openrouter/auto -- auto-route to best available",
            "google/gemma-3-27b-it:free",
            "meta-llama/llama-3.3-70b-instruct:free",
            "deepseek/deepseek-r1:free",
            "qwen/qwen3-235b-a22b:free",
            "mistralai/mistral-small-3.1-24b-instruct:free",
        ]
        await bot.long_reply(message, models, label="models")
        return

    # -- Chat path -----------------------------------------------------------

    api_key = _get_api_key(bot)
    if not api_key:
        await bot.reply(message, "OpenRouter API key not configured")
        return

    nick = message.nick
    if _check_cooldown(bot, nick):
        await bot.reply(message, "Cooldown -- wait a few seconds")
        return

    # Reconstruct full user text (sub might be part of the message)
    user_text = message.text.split(None, 1)[1].strip()

    ps = _ps(bot)
    history = ps["histories"].setdefault(nick, [])

    # Build messages
    system = _get_system_prompt(bot)
    history.append({"role": "user", "content": user_text})

    # Cap history
    if len(history) > _MAX_HISTORY:
        history[:] = history[-_MAX_HISTORY:]

    messages = [{"role": "system", "content": system}] + history

    model = _get_model(bot)
    _set_cooldown(bot, nick)

    loop = asyncio.get_running_loop()
    try:
        data = await loop.run_in_executor(
            None, _chat_request, api_key, model, messages,
        )
    except urllib.error.HTTPError as exc:
        # Remove the failed user message from history
        history.pop()
        if exc.code == 429:
            await bot.reply(message, "Rate limited by OpenRouter -- try again later")
        else:
            await bot.reply(message, f"API error: HTTP {exc.code}")
        return
    except Exception as exc:
        history.pop()
        _log.warning("LLM request failed: %s", exc)
        await bot.reply(message, f"Request failed: {exc}")
        return

    reply = _extract_reply(data)
    if not reply:
        history.pop()
        await bot.reply(message, "No response from model")
        return

    # Store assistant reply in history
    history.append({"role": "assistant", "content": reply})
    if len(history) > _MAX_HISTORY:
        history[:] = history[-_MAX_HISTORY:]

    lines = _truncate(reply).split("\n")
    await bot.long_reply(message, lines, label="llm")