Add fetch_url tool to agent
This commit is contained in:
@@ -125,6 +125,23 @@ TOOLS = [
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "fetch_url",
|
||||||
|
"description": "Fetch a URL and return its text content. HTML is stripped to plain text. Use this to read web pages, documentation, articles, etc.",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"url": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The URL to fetch",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["url"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
SEARX_URL = CONFIG.get("searx_url", "https://searx.mymx.me")
|
SEARX_URL = CONFIG.get("searx_url", "https://searx.mymx.me")
|
||||||
@@ -268,6 +285,55 @@ def web_search(query, num_results=5):
|
|||||||
return f"[search error: {e}]"
|
return f"[search error: {e}]"
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_url(url):
|
||||||
|
"""Fetch a URL and return stripped text content."""
|
||||||
|
log(f"Fetching: {url[:80]}")
|
||||||
|
try:
|
||||||
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
|
class TextExtractor(HTMLParser):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.text = []
|
||||||
|
self._skip = False
|
||||||
|
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
if tag in ("script", "style", "noscript"):
|
||||||
|
self._skip = True
|
||||||
|
|
||||||
|
def handle_endtag(self, tag):
|
||||||
|
if tag in ("script", "style", "noscript"):
|
||||||
|
self._skip = False
|
||||||
|
if tag in ("p", "br", "div", "h1", "h2", "h3", "h4", "li", "tr"):
|
||||||
|
self.text.append("\n")
|
||||||
|
|
||||||
|
def handle_data(self, data):
|
||||||
|
if not self._skip:
|
||||||
|
self.text.append(data)
|
||||||
|
|
||||||
|
req = urllib.request.Request(url, headers={"User-Agent": "fireclaw-agent"})
|
||||||
|
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||||
|
content_type = resp.headers.get("Content-Type", "")
|
||||||
|
raw = resp.read(50_000).decode("utf-8", errors="replace")
|
||||||
|
|
||||||
|
if "html" in content_type:
|
||||||
|
parser = TextExtractor()
|
||||||
|
parser.feed(raw)
|
||||||
|
text = "".join(parser.text)
|
||||||
|
else:
|
||||||
|
text = raw
|
||||||
|
|
||||||
|
# Clean up whitespace
|
||||||
|
import re
|
||||||
|
text = re.sub(r"\n{3,}", "\n\n", text).strip()
|
||||||
|
|
||||||
|
if len(text) > 3000:
|
||||||
|
text = text[:3000] + "\n[truncated]"
|
||||||
|
return text or "[empty page]"
|
||||||
|
except Exception as e:
|
||||||
|
return f"[fetch error: {e}]"
|
||||||
|
|
||||||
|
|
||||||
def try_parse_tool_call(text):
|
def try_parse_tool_call(text):
|
||||||
"""Try to parse a text-based tool call from model output.
|
"""Try to parse a text-based tool call from model output.
|
||||||
Handles formats like:
|
Handles formats like:
|
||||||
@@ -353,6 +419,11 @@ def query_ollama(messages):
|
|||||||
log(f"Tool call [{round_num+1}/{MAX_TOOL_ROUNDS}]: web_search({query[:60]})")
|
log(f"Tool call [{round_num+1}/{MAX_TOOL_ROUNDS}]: web_search({query[:60]})")
|
||||||
result = web_search(query, num)
|
result = web_search(query, num)
|
||||||
messages.append({"role": "tool", "content": result})
|
messages.append({"role": "tool", "content": result})
|
||||||
|
elif fn_name == "fetch_url":
|
||||||
|
url = fn_args.get("url", "")
|
||||||
|
log(f"Tool call [{round_num+1}/{MAX_TOOL_ROUNDS}]: fetch_url({url[:60]})")
|
||||||
|
result = fetch_url(url)
|
||||||
|
messages.append({"role": "tool", "content": result})
|
||||||
else:
|
else:
|
||||||
messages.append({
|
messages.append({
|
||||||
"role": "tool",
|
"role": "tool",
|
||||||
@@ -385,6 +456,11 @@ def query_ollama(messages):
|
|||||||
log(f"Text tool call [{round_num+1}/{MAX_TOOL_ROUNDS}]: web_search({query[:60]})")
|
log(f"Text tool call [{round_num+1}/{MAX_TOOL_ROUNDS}]: web_search({query[:60]})")
|
||||||
result = web_search(query, num)
|
result = web_search(query, num)
|
||||||
messages.append({"role": "user", "content": f"Search results:\n{result}\n\nNow respond to the user based on these results."})
|
messages.append({"role": "user", "content": f"Search results:\n{result}\n\nNow respond to the user based on these results."})
|
||||||
|
elif fn_name == "fetch_url":
|
||||||
|
url = fn_args.get("url", "")
|
||||||
|
log(f"Text tool call [{round_num+1}/{MAX_TOOL_ROUNDS}]: fetch_url({url[:60]})")
|
||||||
|
result = fetch_url(url)
|
||||||
|
messages.append({"role": "user", "content": f"Page content:\n{result}\n\nNow respond to the user based on this content."})
|
||||||
payload["messages"] = messages
|
payload["messages"] = messages
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -401,6 +477,7 @@ def build_messages(question, channel):
|
|||||||
system += "\n\nYou have access to tools:"
|
system += "\n\nYou have access to tools:"
|
||||||
system += "\n- run_command: Execute shell commands on your system."
|
system += "\n- run_command: Execute shell commands on your system."
|
||||||
system += "\n- web_search: Search the web for current information."
|
system += "\n- web_search: Search the web for current information."
|
||||||
|
system += "\n- fetch_url: Fetch and read a web page's content."
|
||||||
system += "\n- save_memory: Save important information to your persistent workspace."
|
system += "\n- save_memory: Save important information to your persistent workspace."
|
||||||
system += "\nUse tools when needed rather than guessing. Your workspace at /workspace persists across restarts."
|
system += "\nUse tools when needed rather than guessing. Your workspace at /workspace persists across restarts."
|
||||||
if AGENT_MEMORY and AGENT_MEMORY != "# Agent Memory":
|
if AGENT_MEMORY and AGENT_MEMORY != "# Agent Memory":
|
||||||
|
|||||||
Reference in New Issue
Block a user