From 3b361916faae754b70eb0a2768dbedf54587caef Mon Sep 17 00:00:00 2001 From: Username Date: Sun, 28 Dec 2025 15:18:42 +0100 Subject: [PATCH] fetch, dbs: minor refactoring --- dbs.py | 2 +- fetch.py | 25 +------------------------ 2 files changed, 2 insertions(+), 25 deletions(-) diff --git a/dbs.py b/dbs.py index bb72b9e..f7d16d7 100644 --- a/dbs.py +++ b/dbs.py @@ -693,7 +693,7 @@ def get_database_stats(sqlite): row = sqlite.execute('SELECT COUNT(*) FROM proxylist').fetchone() stats['proxy_count'] = row[0] if row else 0 - row = sqlite.execute('SELECT COUNT(*) FROM proxylist WHERE failed=0').fetchone() + row = sqlite.execute('SELECT COUNT(*) FROM proxylist WHERE failed=0 AND tested IS NOT NULL').fetchone() stats['working_count'] = row[0] if row else 0 row = sqlite.execute('SELECT COUNT(*) FROM uris').fetchone() diff --git a/fetch.py b/fetch.py index cb3b7f5..fc5439f 100644 --- a/fetch.py +++ b/fetch.py @@ -5,7 +5,7 @@ import rocksock import network_stats from http2 import RsHttp, _parse_url from soup_parser import soupify -from misc import _log +from misc import _log, tor_proxy_url config = None @@ -14,14 +14,6 @@ _proxy_valid_cache = {} _proxy_valid_cache_max = 10000 -def tor_proxy_url(torhost): - """Generate Tor SOCKS5 proxy URL with random credentials for circuit isolation.""" - chars = string.ascii_lowercase + string.digits - user = ''.join(random.choice(chars) for _ in range(8)) - passwd = ''.join(random.choice(chars) for _ in range(8)) - return 'socks5://%s:%s@%s' % (user, passwd, torhost) - - class FetchSession(object): """Reusable fetch session with persistent Tor circuit. @@ -927,18 +919,3 @@ def extract_proxies(content, proxydb=None, filter_known=True, proto=None): add_known_proxies([p]) return len(uniques), new - -def extract_urls(content, urls = None, urignore=None): - urls = [] if not urls else urls - soup = soupify(content) - for a in soup.body.find_all('a'): - if not 'rel' in a.attrs or not 'noreferrer' in a.attrs['rel'] or a.attrs['href'] in urls: continue - bad = False - href = a.attrs['href'] - for i in urignore: - if re.findall(i, href): - bad = True - break - if not bad: urls.append(href) - return urls -