fetch, dbs: minor refactoring
This commit is contained in:
2
dbs.py
2
dbs.py
@@ -693,7 +693,7 @@ def get_database_stats(sqlite):
|
|||||||
row = sqlite.execute('SELECT COUNT(*) FROM proxylist').fetchone()
|
row = sqlite.execute('SELECT COUNT(*) FROM proxylist').fetchone()
|
||||||
stats['proxy_count'] = row[0] if row else 0
|
stats['proxy_count'] = row[0] if row else 0
|
||||||
|
|
||||||
row = sqlite.execute('SELECT COUNT(*) FROM proxylist WHERE failed=0').fetchone()
|
row = sqlite.execute('SELECT COUNT(*) FROM proxylist WHERE failed=0 AND tested IS NOT NULL').fetchone()
|
||||||
stats['working_count'] = row[0] if row else 0
|
stats['working_count'] = row[0] if row else 0
|
||||||
|
|
||||||
row = sqlite.execute('SELECT COUNT(*) FROM uris').fetchone()
|
row = sqlite.execute('SELECT COUNT(*) FROM uris').fetchone()
|
||||||
|
|||||||
25
fetch.py
25
fetch.py
@@ -5,7 +5,7 @@ import rocksock
|
|||||||
import network_stats
|
import network_stats
|
||||||
from http2 import RsHttp, _parse_url
|
from http2 import RsHttp, _parse_url
|
||||||
from soup_parser import soupify
|
from soup_parser import soupify
|
||||||
from misc import _log
|
from misc import _log, tor_proxy_url
|
||||||
|
|
||||||
config = None
|
config = None
|
||||||
|
|
||||||
@@ -14,14 +14,6 @@ _proxy_valid_cache = {}
|
|||||||
_proxy_valid_cache_max = 10000
|
_proxy_valid_cache_max = 10000
|
||||||
|
|
||||||
|
|
||||||
def tor_proxy_url(torhost):
|
|
||||||
"""Generate Tor SOCKS5 proxy URL with random credentials for circuit isolation."""
|
|
||||||
chars = string.ascii_lowercase + string.digits
|
|
||||||
user = ''.join(random.choice(chars) for _ in range(8))
|
|
||||||
passwd = ''.join(random.choice(chars) for _ in range(8))
|
|
||||||
return 'socks5://%s:%s@%s' % (user, passwd, torhost)
|
|
||||||
|
|
||||||
|
|
||||||
class FetchSession(object):
|
class FetchSession(object):
|
||||||
"""Reusable fetch session with persistent Tor circuit.
|
"""Reusable fetch session with persistent Tor circuit.
|
||||||
|
|
||||||
@@ -927,18 +919,3 @@ def extract_proxies(content, proxydb=None, filter_known=True, proto=None):
|
|||||||
add_known_proxies([p])
|
add_known_proxies([p])
|
||||||
|
|
||||||
return len(uniques), new
|
return len(uniques), new
|
||||||
|
|
||||||
def extract_urls(content, urls = None, urignore=None):
|
|
||||||
urls = [] if not urls else urls
|
|
||||||
soup = soupify(content)
|
|
||||||
for a in soup.body.find_all('a'):
|
|
||||||
if not 'rel' in a.attrs or not 'noreferrer' in a.attrs['rel'] or a.attrs['href'] in urls: continue
|
|
||||||
bad = False
|
|
||||||
href = a.attrs['href']
|
|
||||||
for i in urignore:
|
|
||||||
if re.findall(i, href):
|
|
||||||
bad = True
|
|
||||||
break
|
|
||||||
if not bad: urls.append(href)
|
|
||||||
return urls
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user