From 0311abb46a12d7f8eef03f6672305fa1bf3a847d Mon Sep 17 00:00:00 2001 From: Username Date: Tue, 17 Feb 2026 16:43:26 +0100 Subject: [PATCH] fetch: encode unicode URLs to bytes before HTTP/SOCKS ops When URLs arrive as unicode (e.g. from JSON API responses), the unicode type propagates through _parse_url into the SOCKS5 packet construction in rocksock. Port bytes > 127 formatted via %c in a unicode string produce non-ASCII characters that fail on socket sendall() implicit ASCII encode. Encode URLs to UTF-8 bytes at fetch entry points to keep the entire request pipeline in str (bytes) domain. --- fetch.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fetch.py b/fetch.py index fead0e5..d57da6d 100644 --- a/fetch.py +++ b/fetch.py @@ -56,6 +56,8 @@ class FetchSession(object): def fetch(self, url, head=False): """Fetch URL, reusing connection if possible.""" network_stats.set_category('scraper') + if isinstance(url, unicode): + url = url.encode('utf-8') host, port, ssl, uri = _parse_url(url) # Check if we can reuse existing connection @@ -489,6 +491,8 @@ def fetch_contents(url, head=False, proxy=None): retry_messages = ('Engines cannot retrieve results', 'Rate limit exceeded') def _fetch_contents(url, head = False, proxy=None): network_stats.set_category('scraper') + if isinstance(url, unicode): + url = url.encode('utf-8') host, port, ssl, uri = _parse_url(url) headers=[ 'Accept-Language: en-US,en;q=0.8',