From d7db366857145281711c31f58b2b37cb4aa015be Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 22 Aug 2021 20:39:50 +0200 Subject: [PATCH] split to ip/port, "cleanse" ips and ports, bugfixes --- dbs.py | 8 ++++++-- fetch.py | 6 +++++- proxywatchd.py | 39 +++++++++++++++++++++++++-------------- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/dbs.py b/dbs.py index 0308155..2f0b17d 100644 --- a/dbs.py +++ b/dbs.py @@ -13,6 +13,9 @@ def create_table_if_not_exists(sqlite, dbname): proto TEXT, mitm INT, success_count INT, + ip TEXT, + port INT, + consecutive_success INT, total_duration INT)""") elif dbname == 'uris': @@ -34,9 +37,10 @@ def insert_proxies(proxydb, proxies, url): new = [] for p in proxies: - new.append((timestamp,p,3,0,0,0,0,0)) + ip, port = p.split(':') + new.append((timestamp,p,ip,port,3,0,0,0,0,0)) - proxydb.executemany('INSERT OR IGNORE INTO proxylist (added,proxy,failed,tested,success_count,total_duration,mitm,consecutive_success) VALUES (?,?,?,?,?,?,?,?)', new) + proxydb.executemany('INSERT OR IGNORE INTO proxylist (added,proxy,ip,port,failed,tested,success_count,total_duration,mitm,consecutive_success) VALUES (?,?,?,?,?,?,?,?,?,?)', new) proxydb.commit() _log('+%d proxy/ies from %s' % (len(proxies), url), 'added') diff --git a/fetch.py b/fetch.py index ab71cbf..8ac8b67 100644 --- a/fetch.py +++ b/fetch.py @@ -23,7 +23,7 @@ def cleanhtml(raw_html): def fetch_contents(url, head=False, proxy=None): content = None - if len(proxy): + if proxy is not None and len(proxy): for p in proxy: content = _fetch_contents(url, head=head, proxy=p) if content is not None: break @@ -102,6 +102,10 @@ def extract_proxies(content, proxydb): uniques_dict = {} for p in matches: + ip, port = p.split(':') + ip = '.'.join( [ str(int(str(i))) for i in ip.split('.') ] ) + port = int( port.lstrip('0') ) + p = '%s:%s' % (ip, port) uniques_dict[p] = True uniques = [] diff --git a/proxywatchd.py b/proxywatchd.py index 1d2d254..dbd4ad0 100644 --- a/proxywatchd.py +++ b/proxywatchd.py @@ -3,7 +3,9 @@ import threading import time, random, string, re, copy try: - from geoip import geolite2 + import IP2Location + import os + geodb = IP2Location.IP2Location(os.path.join("data", "IP2LOCATION-LITE-DB1.BIN")) geolite = True except: geolite = False @@ -87,8 +89,10 @@ def socks4_resolve(srvname, server_port): class WorkerJob(): - def __init__(self, proxy, proto, failcount, success_count, total_duration, country, mitm, consecutive_success, oldies = False): - self.proxy = proxy + def __init__(self, ip, port, proto, failcount, success_count, total_duration, country, mitm, consecutive_success, oldies = False): + self.ip = ip + self.port = int(port) + self.proxy = '%s:%s' % (ip, port) self.proto = proto self.failcount = failcount self.checktime = None @@ -130,7 +134,7 @@ class WorkerJob(): duration = time.time() proxies = [ rocksock.RocksockProxyFromURL('socks5://%s' % torhost), - rocksock.RocksockProxyFromURL('%s://%s' % (proto, self.proxy)), + rocksock.RocksockProxyFromURL('%s://%s:%s' % (proto, self.ip, self.port)), ] try: @@ -143,7 +147,7 @@ class WorkerJob(): return sock, proto, duration, torhost, srvname, 0, use_ssl except rocksock.RocksockException as e: if config.watchd.debug: - _log("proxy failed: %s://%s: %s"%(proto, self.proxy, e.get_errormessage()), 'debug') + _log("proxy failed: %s://%s:%d: %s"%(proto, self.ip, self.port, e.get_errormessage()), 'debug') et = e.get_errortype() err = e.get_error() @@ -178,6 +182,14 @@ class WorkerJob(): return None, None, None, None, None, fail_inc, use_ssl + def rwip(self, ip): + n = [] + for b in ip.split('.'): + while b[0] == 0 and len(b) > 1: b = b[:1] + n.append(b) + return '.'.join(n) + + def run(self): self.checktime = int(time.time()) checktype = config.watchd.checktype @@ -188,17 +200,16 @@ class WorkerJob(): return try: recv = sock.recv(-1) - #print(recv) regex = '^(:|NOTICE|ERROR)' if checktype == 'irc' else regexes[srv] # good data if re.search(regex, recv, re.IGNORECASE): duration = (time.time() - duration) - if geolite and not self.country or self.country == 'unknown' or self.country == 'N/A': - match = geolite2.lookup(self.proxy.split(':')[0]) - if match is not None: self.country = match.country - else: self.country = 'N/A' + if geolite and self.country is None: + self.ip = self.rwip(self.ip) + rec = geodb.get_all(self.ip) + if rec is not None and rec.country_short: self.country = rec.country_short self.proto = proto self.failcount = 0 @@ -208,7 +219,7 @@ class WorkerJob(): self.total_duration += int(duration*1000) torstats = "" if len(config.torhosts)==1 else ' tor: %s;'%tor recvstats = "".join([x if x in string.printable and ord(x) > 32 else '.' for x in recv]) - _log('%s://%s (%s) d: %.2f sec(s);%s srv: %s; ssl: %s; recv: %s' % (proto, self.proxy, self.country, duration, torstats, srv, str(is_ssl), recvstats[:50]), 'xxxxx') + _log('%s://%s:%d (%s) d: %.2f sec(s);%s srv: %s; ssl: %s; recv: %s' % (proto, self.ip, self.port, self.country, duration, torstats, srv, str(is_ssl), recvstats[:50]), 'xxxxx') else: self.failcount += 1 self.consecutive_success = 0 @@ -323,7 +334,7 @@ class Proxywatchd(): # create table if needed self._prep_db() - self.mysqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, source BLOB, dronebl INT, proto TEXT, success_count INT, total_duration INT)') + self.mysqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, source BLOB, dronebl INT, proto TEXT, success_count INT, total_duration INT, ip TEXT, port INT)') self.mysqlite.commit() self._close_db() @@ -337,7 +348,7 @@ class Proxywatchd(): def fetch_rows(self): self.isoldies = False - q = 'SELECT proxy,proto,failed,success_count,total_duration,country,mitm,consecutive_success FROM proxylist WHERE failed >= ? and failed < ? and (tested + ? + (failed * ?)) < ? ORDER BY RANDOM()' + q = 'SELECT ip,port,proto,failed,success_count,total_duration,country,mitm,consecutive_success FROM proxylist WHERE failed >= ? and failed < ? and (tested + ? + (failed * ?)) < ? ORDER BY RANDOM()' rows = self.mysqlite.execute(q, (0, config.watchd.max_fail, config.watchd.checktime, config.watchd.perfail_checktime, time.time())).fetchall() # check oldies ? if len(rows) < config.watchd.threads: @@ -356,7 +367,7 @@ class Proxywatchd(): rows = self.fetch_rows() #print('preparing jobbs, oldies: %s' % str(self.isoldies)) for row in rows: - job = WorkerJob(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], self.isoldies) + job = WorkerJob(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], self.isoldies) self.jobs.append(job) self._close_db()