fetch, dbs: minor refactoring

This commit is contained in:
Username
2025-12-28 15:18:42 +01:00
parent 50bc13a615
commit 3b361916fa
2 changed files with 2 additions and 25 deletions

2
dbs.py
View File

@@ -693,7 +693,7 @@ def get_database_stats(sqlite):
row = sqlite.execute('SELECT COUNT(*) FROM proxylist').fetchone()
stats['proxy_count'] = row[0] if row else 0
row = sqlite.execute('SELECT COUNT(*) FROM proxylist WHERE failed=0').fetchone()
row = sqlite.execute('SELECT COUNT(*) FROM proxylist WHERE failed=0 AND tested IS NOT NULL').fetchone()
stats['working_count'] = row[0] if row else 0
row = sqlite.execute('SELECT COUNT(*) FROM uris').fetchone()

View File

@@ -5,7 +5,7 @@ import rocksock
import network_stats
from http2 import RsHttp, _parse_url
from soup_parser import soupify
from misc import _log
from misc import _log, tor_proxy_url
config = None
@@ -14,14 +14,6 @@ _proxy_valid_cache = {}
_proxy_valid_cache_max = 10000
def tor_proxy_url(torhost):
"""Generate Tor SOCKS5 proxy URL with random credentials for circuit isolation."""
chars = string.ascii_lowercase + string.digits
user = ''.join(random.choice(chars) for _ in range(8))
passwd = ''.join(random.choice(chars) for _ in range(8))
return 'socks5://%s:%s@%s' % (user, passwd, torhost)
class FetchSession(object):
"""Reusable fetch session with persistent Tor circuit.
@@ -927,18 +919,3 @@ def extract_proxies(content, proxydb=None, filter_known=True, proto=None):
add_known_proxies([p])
return len(uniques), new
def extract_urls(content, urls = None, urignore=None):
urls = [] if not urls else urls
soup = soupify(content)
for a in soup.body.find_all('a'):
if not 'rel' in a.attrs or not 'noreferrer' in a.attrs['rel'] or a.attrs['href'] in urls: continue
bad = False
href = a.attrs['href']
for i in urignore:
if re.findall(i, href):
bad = True
break
if not bad: urls.append(href)
return urls