From e7b8d526c09b055a10c7e0db70570bf4381270ee Mon Sep 17 00:00:00 2001 From: rofl0r Date: Tue, 8 Jan 2019 00:46:41 +0000 Subject: [PATCH 01/13] ppf: print url if fetching failed --- ppf.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ppf.py b/ppf.py index 30c5809..93c03c8 100755 --- a/ppf.py +++ b/ppf.py @@ -43,7 +43,9 @@ def fetch_contents(url): ] proxies = [rocksock.RocksockProxyFromURL('socks4://%s' % random.choice( config.torhosts ))] http = RsHttp(host,ssl=ssl,port=port, keep_alive=True, timeout=15, max_tries=1, follow_redirects=True, auto_set_cookies=True, proxies=proxies, user_agent='Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0') - if not http.connect(): return '' + if not http.connect(): + _log("failed to connect to %s"%url, "ppf") + return '' hdr, res = http.get(uri, headers) res = res.encode('utf-8') if isinstance(res, unicode) else res for retry_message in retry_messages: From f16f754b0e9599e918d1dc27dc71a474327e17c2 Mon Sep 17 00:00:00 2001 From: rofl0r Date: Tue, 8 Jan 2019 02:17:04 +0000 Subject: [PATCH 02/13] implement combo config parser allows all options to be overridden by command line. e.g. [watchd] threads=10 debug=false --watch.threads=50 --debug=true --- comboparse.py | 86 +++++++++++++++++++++++++++++++++++++++++++++++ config.ini.sample | 10 +++--- config.py | 66 ++++++++++++------------------------ ppf.py | 16 +++++---- proxywatchd.py | 20 ++++++----- 5 files changed, 131 insertions(+), 67 deletions(-) create mode 100644 comboparse.py diff --git a/comboparse.py b/comboparse.py new file mode 100644 index 0000000..911b882 --- /dev/null +++ b/comboparse.py @@ -0,0 +1,86 @@ +from ConfigParser import SafeConfigParser, NoOptionError +from argparse import ArgumentParser +import sys + +class _Dummy(): + pass + +class ComboParser(object): + def __init__(self, ini): + self.items = [] + self.cparser = SafeConfigParser() + self.aparser = ArgumentParser() + self.ini = ini + self.items = [] + self.loaded = False + + def add_item(self, section, name, type, default, desc, required): + self.items.append({ + 'section':section, + 'name':name, + 'type':type, + 'default':default, + 'required':required, + }) + self.aparser.add_argument( + '--%s.%s'%(section, name), + help='%s, default: (%s)'%(desc, str(default)), + type=type, + default=None, + required=False + ) + def load(self): + if self.loaded: return + self.loaded = True + + try: self.cparser.read(self.ini) + except: pass + args = self.aparser.parse_args() + for item in self.items: + try: + obj = getattr(self, item['section']) + except AttributeError: + setattr(self, item['section'], _Dummy()) + obj = getattr(self, item['section']) + + setattr(obj, item['name'], item['default']) + inner = getattr(obj, item['name']) + + item['found'] = True + try: + if item['type'] is bool : inner = self.cparser.getboolean(item['section'], item['name']) + elif item['type'] is float: inner = self.cparser.getfloat(item['section'], item['name']) + elif item['type'] is int : inner = self.cparser.getint(item['section'], item['name']) + elif item['type'] is str : inner = self.cparser.get(item['section'], item['name']) + except NoOptionError: + item['found'] = False + try: + arg = getattr(args, '%s.%s'%(item['section'], item['name'])) + if arg is not None: + inner = arg + item['found'] = True + except: pass + if not item['found']: + if item['required']: + sys.stderr.write('error: required config item "%s" not found in section "%s" of "%s"!\n'%(item['name'], item['section'], self.ini)) + sys.exit(1) + else: + sys.stderr.write('warning: assigned default value of "%s" to "%s.%s"\n'%(str(item['default']), item['section'], item['name'])) + setattr(obj, item['name'], inner) + + +# TEST CODE +def _main(): + config = ComboParser('config.ini') + config.add_item('watchd', 'debug', bool, False, 'turn additional debug info on', False) + config.add_item('watchd', 'float', float, 0.1, 'a float test', True) + config.add_item('watchd', 'strupp', str, "sup", 'a str test', False) + config.add_item('common', 'tor_host', str, '127.0.0.1:9050', 'address of tor proxy', True) + config.load() + print config.watchd.debug + print config.watchd.float + print config.watchd.strupp + print config.common.tor_host + +if __name__ == '__main__': + _main() diff --git a/config.ini.sample b/config.ini.sample index f3a4bff..0182c75 100644 --- a/config.ini.sample +++ b/config.ini.sample @@ -1,9 +1,8 @@ -[global] -tor_host = 127.0.0.1:9050 +[common] +tor_hosts = 127.0.0.1:9050 database = proxylist.sqlite -[watcherd] -proxy_file = false +[watchd] max_fail = 5 threads = 10 timeout = 15 @@ -11,10 +10,9 @@ submit_after = 200 use_ssl = false debug = false -[proxyfind] +[ppf] search = true timeout = 30 -threads = 3 checktime = 3600 perfail_checktime = 3600 diff --git a/config.py b/config.py index 32d7102..4cf79a9 100644 --- a/config.py +++ b/config.py @@ -1,48 +1,24 @@ -from ConfigParser import SafeConfigParser +from comboparse import ComboParser -_loaded = False +class Config(ComboParser): + def load(self): + super(Config, self).load() + self.torhosts = [ str(i).strip() for i in self.common.tor_hosts.split(',') ] + with open('servers.txt', 'r') as handle: + self.servers = [x.strip() for x in handle.readlines() if len(x.strip()) > 0] + def __init__(self): + super(Config, self).__init__('config.ini') + self.add_item('common', 'tor_hosts', str, '127.0.0.1:9050', 'comma-separated list of tor proxy address(es)', True) + self.add_item('common', 'database', str, 'proxylist.sqlite', 'filename of database', True) -class phantom(): - def __init__(self): pass + self.add_item('watchd', 'max_fail', int, 5, 'number of fails after which a proxy is considered dead', False) + self.add_item('watchd', 'threads', int, 10, 'number of threads watchd uses to check proxies', True) + self.add_item('watchd', 'timeout', int, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False) + self.add_item('watchd', 'submit_after', int, 200, 'min. number of tested proxies for DB write', False) + self.add_item('watchd', 'debug', bool, False, 'whether to print additional debug info', False) + self.add_item('watchd', 'use_ssl', bool, False, 'whether to use SSL and port 6697 to connect to targets (slower)', False) -def load(): - if _loaded: return - global database, maxfail, search, torhosts, watchd_threads, checktime, timeout, read_timeout, submit_after, use_ssl, url_checktime, url_perfail_checktime - - ## read the config files - parser = SafeConfigParser() - parser.read('config.ini') - - database = parser.get('global', 'database') - #maxfail = parser.getint('global', 'proxy_max_fail') - torhosts = [ str(i).strip() for i in parser.get('global', 'tor_host').split(',') ] - - global _watchd - _watchd = phantom() - _watchd.threads = parser.getint('watcherd', 'threads') - _watchd.timeout = parser.getint('watcherd', 'timeout') - _watchd.submit_after = parser.getint('watcherd', 'submit_after') - _watchd.use_ssl = parser.getboolean('watcherd', 'use_ssl') - _watchd.debug = parser.getboolean('watcherd', 'debug') - _watchd.maxfail = parser.getint('watcherd', 'max_fail') - - global _leechd - _leechd = phantom() - _leechd.checktime = parser.get('proxyfind', 'checktime') - _leechd.perfail_checktime = parser.get('proxyfind', 'perfail_checktime') - _leechd.search = parser.getboolean('proxyfind', 'search') - - global watchd_debug - watchd_debug = parser.getboolean('watcherd', 'debug') - - # allow overriding select items from the commandline - import argparse - aparse = argparse.ArgumentParser() - aparse.add_argument('--watchd_threads', help="how many proxy checker threads to spin up, 0==none, default: 10", type=int, default=_watchd.threads, required=False) - args = aparse.parse_args() - - _watchd.threads = args.watchd_threads - - global servers - with open('servers.txt', 'r') as handle: - servers = [x.strip() for x in handle.readlines() if len(x.strip()) > 0] + self.add_item('ppf', 'search', bool, True, 'whether to use searx search engine to find new proxy lists', False) + self.add_item('ppf', 'timeout', float, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False) + self.add_item('ppf', 'checktime', int, 3600, 'base checking interval for urls in db in seconds', False) + self.add_item('ppf', 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per experienced failure', False) diff --git a/ppf.py b/ppf.py index 93c03c8..6bf8986 100755 --- a/ppf.py +++ b/ppf.py @@ -8,10 +8,12 @@ import mysqlite import proxywatchd from misc import _log from soup_parser import soupify -import config +from config import Config from http2 import RsHttp, _parse_url import rocksock +config = Config() + base_header = { 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', } @@ -71,7 +73,7 @@ def insert_proxies(proxies, uri, sqlite, timestamp): _log('+%d item(s) from %s' % (len(new), uri), 'added') def proxyfind(sqlite = None): - if not sqlite: sqlite = mysqlite.mysqlite(config.database,str) + if not sqlite: sqlite = mysqlite.mysqlite(config.common.database,str) choice = random.choice(searx_instances) urls = [] @@ -157,14 +159,14 @@ if __name__ == '__main__': config.load() proxies={'http':'socks4://%s' % random.choice(config.torhosts),'https':'socks4://%s' % random.choice(config.torhosts)} - sqlite = mysqlite.mysqlite(config.database, str) + sqlite = mysqlite.mysqlite(config.common.database, str) ## create dbs if required sqlite.execute('CREATE TABLE IF NOT EXISTS uris (added INT, url TEXT, check_time INT, error INT, driver INT, hash TEXT)') sqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, dronebl INT, proto TEXT, success_count INT, total_duration INT)') sqlite.commit() import_from_file('import.txt', sqlite) - if config._leechd.search: + if config.ppf.search: ## load search terms with open('search_terms.txt', 'r') as f: search_terms = [ i.strip() for i in f.read().split('\n') if len(i.strip()) ] @@ -175,7 +177,7 @@ if __name__ == '__main__': empty = [ urignore.append(i.split('/')[2]) for i in searx_instances ] # start proxy watcher - if config._watchd.threads > 0: + if config.watchd.threads > 0: watcherd = proxywatchd.Proxywatchd() watcherd.start() else: @@ -185,11 +187,11 @@ if __name__ == '__main__': while True: try: ## any site that needs to be checked ? - rows = [ [i[0],i[1],i[2]] for i in sqlite.execute('SELECT url,hash,error FROM uris WHERE (check_time+?+(error*?) Date: Tue, 8 Jan 2019 03:25:52 +0000 Subject: [PATCH 03/13] ppf: honor timeout --- ppf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppf.py b/ppf.py index 6bf8986..3287118 100755 --- a/ppf.py +++ b/ppf.py @@ -44,7 +44,7 @@ def fetch_contents(url): 'Cache-Control: max-age=0', ] proxies = [rocksock.RocksockProxyFromURL('socks4://%s' % random.choice( config.torhosts ))] - http = RsHttp(host,ssl=ssl,port=port, keep_alive=True, timeout=15, max_tries=1, follow_redirects=True, auto_set_cookies=True, proxies=proxies, user_agent='Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0') + http = RsHttp(host,ssl=ssl,port=port, keep_alive=True, timeout=config.ppf.timeout, max_tries=1, follow_redirects=True, auto_set_cookies=True, proxies=proxies, user_agent='Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0') if not http.connect(): _log("failed to connect to %s"%url, "ppf") return '' From 38d89f5bd9d04f92d7f4b4fb3f7a9a8dcfd3fe3f Mon Sep 17 00:00:00 2001 From: rofl0r Date: Tue, 8 Jan 2019 03:30:31 +0000 Subject: [PATCH 04/13] ppf: add option for number of http retries --- config.ini.sample | 1 + config.py | 1 + ppf.py | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/config.ini.sample b/config.ini.sample index 0182c75..459837c 100644 --- a/config.ini.sample +++ b/config.ini.sample @@ -13,6 +13,7 @@ debug = false [ppf] search = true timeout = 30 +http_retries = 1 checktime = 3600 perfail_checktime = 3600 diff --git a/config.py b/config.py index 4cf79a9..c632d70 100644 --- a/config.py +++ b/config.py @@ -20,5 +20,6 @@ class Config(ComboParser): self.add_item('ppf', 'search', bool, True, 'whether to use searx search engine to find new proxy lists', False) self.add_item('ppf', 'timeout', float, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False) + self.add_item('ppf', 'http_retries', int, 1, 'number of retries for http connects', False) self.add_item('ppf', 'checktime', int, 3600, 'base checking interval for urls in db in seconds', False) self.add_item('ppf', 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per experienced failure', False) diff --git a/ppf.py b/ppf.py index 3287118..9c43b71 100755 --- a/ppf.py +++ b/ppf.py @@ -44,7 +44,7 @@ def fetch_contents(url): 'Cache-Control: max-age=0', ] proxies = [rocksock.RocksockProxyFromURL('socks4://%s' % random.choice( config.torhosts ))] - http = RsHttp(host,ssl=ssl,port=port, keep_alive=True, timeout=config.ppf.timeout, max_tries=1, follow_redirects=True, auto_set_cookies=True, proxies=proxies, user_agent='Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0') + http = RsHttp(host,ssl=ssl,port=port, keep_alive=True, timeout=config.ppf.timeout, max_tries=config.ppf.http_retries, follow_redirects=True, auto_set_cookies=True, proxies=proxies, user_agent='Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0') if not http.connect(): _log("failed to connect to %s"%url, "ppf") return '' From fb917184b0f32270fdca3c93381da1bea29357d7 Mon Sep 17 00:00:00 2001 From: rofl0r Date: Tue, 8 Jan 2019 03:34:41 +0000 Subject: [PATCH 05/13] watchd: remove catch-it-all except statements it took considerable time to figure out why it suddenly stopped working due to a typo inside the try/except block. --- proxywatchd.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/proxywatchd.py b/proxywatchd.py index 280cb76..2756f12 100644 --- a/proxywatchd.py +++ b/proxywatchd.py @@ -66,7 +66,6 @@ class WorkerJob(): except KeyboardInterrupt as e: raise(e) - except: sock.disconnect() return None, None, None, None, None, fail_inc @@ -100,7 +99,7 @@ class WorkerJob(): _log('%s://%s;%s d: %.2f sec(s);%s srv: %s; recv: %s' % (proto, self.proxy, cstats, duration, torstats, srv, recvstats), 'xxxxx') except KeyboardInterrupt as e: raise e - except: + except rocksock.RocksockException as e: self.failcount += 1 finally: sock.disconnect() From 35421b980e8779f01ce03d26623db647298f89b4 Mon Sep 17 00:00:00 2001 From: rofl0r Date: Tue, 8 Jan 2019 03:35:40 +0000 Subject: [PATCH 06/13] rocksock: bump to latest --- rocksock.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rocksock.py b/rocksock.py index 15732bb..8d328a3 100644 --- a/rocksock.py +++ b/rocksock.py @@ -252,6 +252,8 @@ class Rocksock(): chunk = self.sock.recv(n) except socket.timeout: raise RocksockException(RS_E_HIT_TIMEOUT, failedproxy=self._failed_proxy(pnum)) + except socket.error as e: + raise RocksockException(e.errno, errortype=RS_ET_SYS, failedproxy=self._failed_proxy(pnum)) except ssl.SSLError as e: s = self._get_ssl_exception_reason(e) if s == 'The read operation timed out': From 9ccf8b78547ccd10e5894be60f2d060a3d9b6cae Mon Sep 17 00:00:00 2001 From: rofl0r Date: Tue, 8 Jan 2019 04:18:25 +0000 Subject: [PATCH 07/13] ppf: write dates as int --- ppf.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ppf.py b/ppf.py index 9c43b71..c8d8683 100755 --- a/ppf.py +++ b/ppf.py @@ -34,7 +34,7 @@ def import_from_file(fn, sqlite): exists = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE url=?',(u,)).fetchall() ] if exists: continue print('adding "%s"' % u) - sqlite.execute('INSERT INTO uris (added,url,check_time,error) VALUES (?,?,?,?)', (time.time(),u,0,1)) + sqlite.execute('INSERT INTO uris (added,url,check_time,error) VALUES (?,?,?,?)', (int(time.time()),u,0,1)) sqlite.commit() def fetch_contents(url): @@ -95,7 +95,7 @@ def proxyfind(sqlite = None): if len(urls): query = [ 'url=?' for u in urls ] known = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE %s' % ' OR '.join(query),urls).fetchall() ] - time_now = time.time() + time_now = int(time.time()) new = [ (time_now,i,0,5,0) for i in urls if not i in known ] if len(new): sqlite.executemany('INSERT INTO uris (added,url,check_time,error,driver) values(?,?,?,?,?)', new) @@ -139,13 +139,13 @@ def proxyleech(sqlite, rows): else: row[2] = 0 #check_time = (time.time() + 3600 + (3600 * row[2])) - sqlite.execute('UPDATE uris SET error=?,hash=?,check_time=? where url=?', (row[2],hash, time.time(),row[0])) + sqlite.execute('UPDATE uris SET error=?,hash=?,check_time=? where url=?', (row[2],hash, int(time.time()),row[0])) sqlite.commit() if not row[1] or row[2] > 0: return add = [] - time_now = time.time() + time_now = int(time.time()) for i in uniques: add.append(i) if len(add) > 500: @@ -187,7 +187,7 @@ if __name__ == '__main__': while True: try: ## any site that needs to be checked ? - rows = [ [i[0],i[1],i[2]] for i in sqlite.execute('SELECT url,hash,error FROM uris WHERE (check_time+?+(error*?) Date: Tue, 8 Jan 2019 04:30:50 +0000 Subject: [PATCH 08/13] ppf: check for valid ports --- ppf.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/ppf.py b/ppf.py index c8d8683..13b2d5f 100755 --- a/ppf.py +++ b/ppf.py @@ -55,6 +55,11 @@ def fetch_contents(url): return res +def valid_port(proxy): + ip, port = proxy.split(':') + port = int(port) + return port > 0 and port < 65535 + _known_proxies = {} def insert_proxies(proxies, uri, sqlite, timestamp): global _known_proxies @@ -63,9 +68,12 @@ def insert_proxies(proxies, uri, sqlite, timestamp): for k in known: _known_proxies[k[0]] = True - new = [ (timestamp,i,3,0,0,0) for i in proxies if not i in _known_proxies ] - for i in new: - _known_proxies[i[1]] = True + new = [] + for p in proxies: + if not p in _known_proxies: + if not valid_port(p): continue + new.append((timestamp,p,3,0,0,0)) + _known_proxies[p] = True if len(new): sqlite.executemany('INSERT INTO proxylist (added,proxy,failed,tested,success_count,total_duration) VALUES (?,?,?,?,?,?)', new) From b9058b03344a624d5c3b977763f3902745d17618 Mon Sep 17 00:00:00 2001 From: rofl0r Date: Tue, 8 Jan 2019 04:38:14 +0000 Subject: [PATCH 09/13] http2: bump to latest --- http2.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/http2.py b/http2.py index 5db7b94..1018482 100644 --- a/http2.py +++ b/http2.py @@ -75,7 +75,7 @@ def _is_textual_content_type(ct): return ct in TEXTUAL_CONTENT_TYPES_LIST class RsHttp(): - def __init__(self, host, port=80, ssl=False, follow_redirects=False, auto_set_cookies=False, keep_alive=False, timeout=60, user_agent=None, proxies=None, max_tries=10, **kwargs): + def __init__(self, host, port=80, ssl=False, follow_redirects=False, auto_set_cookies=False, keep_alive=False, timeout=60, user_agent=None, proxies=None, max_tries=10, log_errors=True, **kwargs): self.host = host self.port = port self.use_ssl = ssl @@ -88,10 +88,12 @@ class RsHttp(): self.proxies = proxies self.cookies = dict() self.max_tries = max_tries + self.log_errors = log_errors self.headers = [] def _err_log(self, s): - sys.stderr.write(s + '\n') + if self.log_errors: + sys.stderr.write(s + '\n') def connect(self): return self.reconnect() @@ -310,6 +312,7 @@ class RsHttp(): self.use_ssl = use_ssl self.conn.disconnect() self.conn = None + self.reconnect() return self.get(url, extras) return hdr, res From b88a8426dc5426e1b4f6e74a2594b2bb06881881 Mon Sep 17 00:00:00 2001 From: rofl0r Date: Tue, 8 Jan 2019 04:38:28 +0000 Subject: [PATCH 10/13] rocksock: bump to latest --- rocksock.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rocksock.py b/rocksock.py index 8d328a3..c92f6e7 100644 --- a/rocksock.py +++ b/rocksock.py @@ -117,6 +117,8 @@ class RocksockException(Exception): class RocksockHostinfo(): def __init__(self, host, port): + if port < 0 or port > 65535: + raise RocksockException(RS_E_INVALID_PROXY_URL, failedproxy=-1) self.host = host self.port = port From b6839eea79a86d56b82dd060db8776f5757f9cf5 Mon Sep 17 00:00:00 2001 From: rofl0r Date: Tue, 8 Jan 2019 15:42:03 +0000 Subject: [PATCH 11/13] rocksock/http2: bump to latest --- http2.py | 11 ++++++++++- rocksock.py | 19 ++++++++++++++----- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/http2.py b/http2.py index 1018482..5520b37 100644 --- a/http2.py +++ b/http2.py @@ -75,7 +75,10 @@ def _is_textual_content_type(ct): return ct in TEXTUAL_CONTENT_TYPES_LIST class RsHttp(): - def __init__(self, host, port=80, ssl=False, follow_redirects=False, auto_set_cookies=False, keep_alive=False, timeout=60, user_agent=None, proxies=None, max_tries=10, log_errors=True, **kwargs): + def __init__(self, host, port=80, ssl=False, follow_redirects=False, \ + auto_set_cookies=False, keep_alive=False, timeout=60, \ + user_agent=None, proxies=None, max_tries=10, log_errors=True, \ + **kwargs): self.host = host self.port = port self.use_ssl = ssl @@ -89,8 +92,12 @@ class RsHttp(): self.cookies = dict() self.max_tries = max_tries self.log_errors = log_errors + self.last_rs_exception = None self.headers = [] + def get_last_rocksock_exception(self): + return self.last_rs_exception + def _err_log(self, s): if self.log_errors: sys.stderr.write(s + '\n') @@ -244,6 +251,7 @@ class RsHttp(): self.conn.connect() return True except RocksockException as e: + self.last_rs_exception = e if e.errortype == rocksock.RS_ET_GAI and e.error==-2: # -2: Name does not resolve self.conn.disconnect() @@ -279,6 +287,7 @@ class RsHttp(): try: return func(*args) except RocksockException as e: + self.last_rs_exception = e self.conn.disconnect() if not self.reconnect(): return failret except IOError: diff --git a/rocksock.py b/rocksock.py index c92f6e7..6dec6d0 100644 --- a/rocksock.py +++ b/rocksock.py @@ -1,4 +1,4 @@ -import socket, ssl, select, copy +import socket, ssl, select, copy, errno # rs_proxyType RS_PT_NONE = 0 @@ -31,9 +31,13 @@ RS_E_HIT_READTIMEOUT = 14 RS_E_HIT_WRITETIMEOUT = 15 RS_E_HIT_CONNECTTIMEOUT = 16 RS_E_PROXY_GENERAL_FAILURE = 17 +RS_E_TARGET_NET_UNREACHABLE = 18 RS_E_TARGETPROXY_NET_UNREACHABLE = 18 +RS_E_TARGET_HOST_UNREACHABLE = 19 RS_E_TARGETPROXY_HOST_UNREACHABLE = 19 +RS_E_TARGET_CONN_REFUSED = 20 RS_E_TARGETPROXY_CONN_REFUSED = 20 +RS_E_TARGET_TTL_EXPIRED = 21 RS_E_TARGETPROXY_TTL_EXPIRED = 21 RS_E_PROXY_COMMAND_NOT_SUPPORTED = 22 RS_E_PROXY_ADDRESSTYPE_NOT_SUPPORTED = 23 @@ -99,7 +103,6 @@ class RocksockException(Exception): RS_E_INVALID_PROXY_URL : "invalid proxy URL string" } if self.errortype == RS_ET_SYS: - import errno if self.error in errno.errorcode: msg = "ERRNO: " + errno.errorcode[self.error] else: @@ -168,6 +171,12 @@ class Rocksock(): self.sock = None self.timeout = timeout + def _translate_socket_error(self, e, pnum): + fp = self._failed_proxy(pnum) + if e.errno == errno.ECONNREFUSED: + return RocksockException(RS_E_TARGET_CONN_REFUSED, failedproxy=fp) + return RocksockException(e.errno, errortype=RS_ET_SYS, failedproxy=fp) + def _failed_proxy(self, pnum): if pnum < 0: return -1 if pnum >= len(self.proxychain)-1: return -1 @@ -191,7 +200,7 @@ class Rocksock(): except socket.timeout: raise RocksockException(RS_E_HIT_TIMEOUT, failedproxy=self._failed_proxy(0)) except socket.error as e: - raise RocksockException(e.errno, errortype=RS_ET_SYS, failedproxy=self._failed_proxy(0)) + raise self._translate_socket_error(e, 0) for pnum in xrange(1, len(self.proxychain)): curr = self.proxychain[pnum] @@ -206,7 +215,7 @@ class Rocksock(): #if hasattr(e, 'library'): subsystem = e.library raise RocksockException(RS_E_SSL_GENERIC, failedproxy=reason, errortype=RS_ET_SSL) except socket.error as e: - raise RocksockException(e.errno, errortype=RS_ET_SYS) + raise self._translate_socket_error(e, -1) except Exception as e: raise e """ @@ -255,7 +264,7 @@ class Rocksock(): except socket.timeout: raise RocksockException(RS_E_HIT_TIMEOUT, failedproxy=self._failed_proxy(pnum)) except socket.error as e: - raise RocksockException(e.errno, errortype=RS_ET_SYS, failedproxy=self._failed_proxy(pnum)) + raise self._translate_socket_error(e, pnum) except ssl.SSLError as e: s = self._get_ssl_exception_reason(e) if s == 'The read operation timed out': From 6e4c45175ef7afc61185ea3a305fc334df4e3040 Mon Sep 17 00:00:00 2001 From: rofl0r Date: Tue, 8 Jan 2019 15:43:03 +0000 Subject: [PATCH 12/13] ppf: add safeguards against tor outage --- ppf.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/ppf.py b/ppf.py index 13b2d5f..8597bfe 100755 --- a/ppf.py +++ b/ppf.py @@ -43,11 +43,25 @@ def fetch_contents(url): 'Accept-Language: en-US,en;q=0.8', 'Cache-Control: max-age=0', ] - proxies = [rocksock.RocksockProxyFromURL('socks4://%s' % random.choice( config.torhosts ))] - http = RsHttp(host,ssl=ssl,port=port, keep_alive=True, timeout=config.ppf.timeout, max_tries=config.ppf.http_retries, follow_redirects=True, auto_set_cookies=True, proxies=proxies, user_agent='Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0') - if not http.connect(): - _log("failed to connect to %s"%url, "ppf") - return '' + while True: + proxies = [rocksock.RocksockProxyFromURL('socks4://%s' % random.choice( config.torhosts ))] + http = RsHttp(host,ssl=ssl,port=port, keep_alive=True, timeout=config.ppf.timeout, max_tries=config.ppf.http_retries, follow_redirects=True, auto_set_cookies=True, proxies=proxies, user_agent='Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0') + if not http.connect(): + _log("failed to connect to %s"%url, "ppf") + e = http.get_last_rocksock_exception() + if not e: + return '' + et = e.get_errortype() + ee = e.get_error() + ef = e.get_failedproxy() + if et == rocksock.RS_ET_OWN and \ + ee == rocksock.RS_E_TARGET_CONN_REFUSED \ + and ef == 0: + _log("could not connect to proxy 0 - check your connection", "error") + time.sleep(5) + continue + return '' + break hdr, res = http.get(uri, headers) res = res.encode('utf-8') if isinstance(res, unicode) else res for retry_message in retry_messages: From bd1e85a8838cc356290bac6fa7ae834952f06547 Mon Sep 17 00:00:00 2001 From: rofl0r Date: Tue, 8 Jan 2019 16:06:01 +0000 Subject: [PATCH 13/13] proxywatchd: add safeguards against tor outage --- proxywatchd.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/proxywatchd.py b/proxywatchd.py index 2756f12..7ee3b91 100644 --- a/proxywatchd.py +++ b/proxywatchd.py @@ -59,6 +59,12 @@ class WorkerJob(): err == rocksock.RS_E_HIT_TIMEOUT: # proxy is not online, so don't waste time trying all possible protocols break + elif fp == 0 and \ + err == rocksock.RS_E_TARGET_CONN_REFUSED: + fail_inc = 0 + if random.randint(0, (config.watchd.threads-1)/2) == 0: + _log("could not connect to proxy 0, sleep 5s", "ERROR") + time.sleep(5) elif et == rocksock.RS_ET_GAI: fail_inc = 0 _log("could not resolve connection target %s"%srv, "ERROR")