Add fetch_url tool to agent

2026-04-07 17:43:39 +00:00
parent d299e394f0
commit d3ed3619c2
1 changed files with 77 additions and 0 deletions
--- a/agent/agent.py
+++ b/agent/agent.py
@@ -125,6 +125,23 @@ TOOLS = [
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "fetch_url",
            "description": "Fetch a URL and return its text content. HTML is stripped to plain text. Use this to read web pages, documentation, articles, etc.",
            "parameters": {
                "type": "object",
                "properties": {
                    "url": {
                        "type": "string",
                        "description": "The URL to fetch",
                    },
                },
                "required": ["url"],
            },
        },
    },
 ]
 SEARX_URL = CONFIG.get("searx_url", "https://searx.mymx.me")
@@ -268,6 +285,55 @@ def web_search(query, num_results=5):
        return f"[search error: {e}]"
 def fetch_url(url):
    """Fetch a URL and return stripped text content."""
    log(f"Fetching: {url[:80]}")
    try:
        from html.parser import HTMLParser
        class TextExtractor(HTMLParser):
            def __init__(self):
                super().__init__()
                self.text = []
                self._skip = False
            def handle_starttag(self, tag, attrs):
                if tag in ("script", "style", "noscript"):
                    self._skip = True
            def handle_endtag(self, tag):
                if tag in ("script", "style", "noscript"):
                    self._skip = False
                if tag in ("p", "br", "div", "h1", "h2", "h3", "h4", "li", "tr"):
                    self.text.append("\n")
            def handle_data(self, data):
                if not self._skip:
                    self.text.append(data)
        req = urllib.request.Request(url, headers={"User-Agent": "fireclaw-agent"})
        with urllib.request.urlopen(req, timeout=15) as resp:
            content_type = resp.headers.get("Content-Type", "")
            raw = resp.read(50_000).decode("utf-8", errors="replace")
        if "html" in content_type:
            parser = TextExtractor()
            parser.feed(raw)
            text = "".join(parser.text)
        else:
            text = raw
        # Clean up whitespace
        import re
        text = re.sub(r"\n{3,}", "\n\n", text).strip()
        if len(text) > 3000:
            text = text[:3000] + "\n[truncated]"
        return text or "[empty page]"
    except Exception as e:
        return f"[fetch error: {e}]"
 def try_parse_tool_call(text):
    """Try to parse a text-based tool call from model output.
    Handles formats like:
@@ -353,6 +419,11 @@ def query_ollama(messages):
                    log(f"Tool call [{round_num+1}/{MAX_TOOL_ROUNDS}]: web_search({query[:60]})")
                    result = web_search(query, num)
                    messages.append({"role": "tool", "content": result})
                elif fn_name == "fetch_url":
                    url = fn_args.get("url", "")
                    log(f"Tool call [{round_num+1}/{MAX_TOOL_ROUNDS}]: fetch_url({url[:60]})")
                    result = fetch_url(url)
                    messages.append({"role": "tool", "content": result})
                else:
                    messages.append({
                        "role": "tool",
@@ -385,6 +456,11 @@ def query_ollama(messages):
                log(f"Text tool call [{round_num+1}/{MAX_TOOL_ROUNDS}]: web_search({query[:60]})")
                result = web_search(query, num)
                messages.append({"role": "user", "content": f"Search results:\n{result}\n\nNow respond to the user based on these results."})
            elif fn_name == "fetch_url":
                url = fn_args.get("url", "")
                log(f"Text tool call [{round_num+1}/{MAX_TOOL_ROUNDS}]: fetch_url({url[:60]})")
                result = fetch_url(url)
                messages.append({"role": "user", "content": f"Page content:\n{result}\n\nNow respond to the user based on this content."})
            payload["messages"] = messages
            continue
@@ -401,6 +477,7 @@ def build_messages(question, channel):
        system += "\n\nYou have access to tools:"
        system += "\n- run_command: Execute shell commands on your system."
        system += "\n- web_search: Search the web for current information."
        system += "\n- fetch_url: Fetch and read a web page's content."
        system += "\n- save_memory: Save important information to your persistent workspace."
        system += "\nUse tools when needed rather than guessing. Your workspace at /workspace persists across restarts."
    if AGENT_MEMORY and AGENT_MEMORY != "# Agent Memory":