Files
ppf/proxywatchd.py
Mickaël Serneels f179080cca use geoloc
now saves proxy's country in db
2019-05-17 22:59:32 +02:00

420 lines
12 KiB
Python

#!/usr/bin/env python
import threading
import time, random, string, re, copy
from geoip import geolite2
from config import Config
import mysqlite
from misc import _log
import rocksock
config = Config()
_run_standalone = False
cached_dns = dict()
def try_div(a, b):
if b != 0: return a/float(b)
return 0
def socks4_resolve(srvname, server_port):
srv = srvname
if srv in cached_dns:
srv = cached_dns[srvname]
if config.watchd.debug:
_log("using cached ip (%s) for %s (proxy: %s)"%(srv, srvname, self.proxy), "debug")
else:
dns_fail = False
try:
af, sa = rocksock.resolve(rocksock.RocksockHostinfo(srvname, server_port), want_v4=True)
if sa is not None:
cached_dns[srvname] = sa[0]
srv = sa[0]
else: dns_fail = True
except rocksock.RocksockException as e:
assert(e.get_errortype() == rocksock.RS_ET_GAI)
dns_fail = True
if dns_fail:
fail_inc = 0
_log("could not resolve connection target %s"%srvname, "ERROR")
return False
return srv
class WorkerJob():
def __init__(self, proxy, proto, failcount, success_count, total_duration, country):
self.proxy = proxy
self.proto = proto
self.failcount = failcount
self.checktime = None
self.success_count = success_count
self.total_duration = total_duration
self.country = country
def connect_socket(self):
srvname = random.choice(config.servers).strip()
protos = ['http', 'socks5', 'socks4'] if self.proto is None else [self.proto]
use_ssl = random.choice([0,1]) if config.watchd.use_ssl == 2 else config.watchd.use_ssl
server_port = 6697 if use_ssl else 6667
fail_inc = 1
for proto in protos:
torhost = random.choice(config.torhosts)
# socks4 (without 4a) requires a raw ip address
# rocksock automatically resolves if needed, but it's more
# efficient to cache the result.
if proto == 'socks4': srv = socks4_resolve(srvname, server_port)
else: srv = srvname
## skip socks4 failed resolution
if not srv: continue
duration = time.time()
proxies = [
rocksock.RocksockProxyFromURL('socks4://%s' % torhost),
rocksock.RocksockProxyFromURL('%s://%s' % (proto, self.proxy)),
]
try:
sock = rocksock.Rocksock(host=srv, port=server_port, ssl=use_ssl, proxies=proxies, timeout=config.watchd.timeout)
sock.connect()
sock.send('NICK\n')
return sock, proto, duration, torhost, srvname, 0
except rocksock.RocksockException as e:
if config.watchd.debug:
_log("proxy failed: %s://%s: %s"%(proto, self.proxy, e.get_errormessage()), 'debug')
et = e.get_errortype()
err = e.get_error()
fp = e.get_failedproxy()
sock.disconnect()
if et == rocksock.RS_ET_OWN:
if fp == 1 and \
err == rocksock.RS_E_REMOTE_DISCONNECTED or \
err == rocksock.RS_E_HIT_TIMEOUT:
# proxy is not online, so don't waste time trying all possible protocols
break
elif fp == 0 and \
err == rocksock.RS_E_TARGET_CONN_REFUSED:
fail_inc = 0
if random.randint(0, (config.watchd.threads-1)/2) == 0:
_log("could not connect to proxy 0, sleep 5s", "ERROR")
time.sleep(5)
elif et == rocksock.RS_ET_GAI:
assert(0)
fail_inc = 0
_log("could not resolve connection target %s"%srvname, "ERROR")
break
except KeyboardInterrupt as e:
raise(e)
return None, None, None, None, None, fail_inc
def run(self):
self.checktime = int(time.time())
sock, proto, duration, tor, srv, failinc = self.connect_socket()
if not sock:
self.failcount += failinc
return
try:
recv = sock.recv(6)
# good data
if re.match('^(:|NOTICE)', recv, re.IGNORECASE):
duration = (time.time() - duration)
if not self.country or self.country == 'unknown' or self.country == 'N/A':
match = geolite2.lookup(self.proxy.split(':')[0])
if match is not None: self.country = match.country
else: self.country = 'N/A'
self.proto = proto
self.failcount = 0
self.success_count = self.success_count + 1
self.total_duration += int(duration*1000)
torstats = "" if len(config.torhosts)==1 else ' tor: %s;'%tor
recvstats = "".join([x if x in string.printable and ord(x) > 32 else '.' for x in recv])
_log('%s://%s (%s) d: %.2f sec(s);%s srv: %s; recv: %s' % (proto, self.proxy, self.country, duration, torstats, srv, recvstats), 'xxxxx')
except KeyboardInterrupt as e:
raise e
except rocksock.RocksockException as e:
self.failcount += 1
finally:
sock.disconnect()
class WorkerThread():
def __init__ (self, id):
self.id = id
self.done = threading.Event()
self.thread = None
self.workqueue = []
self.workdone = []
self.lock = threading.Lock()
def stop(self):
self.done.set()
def term(self):
if self.thread: self.thread.join()
def add_jobs(self, jobs):
with self.lock:
self.workqueue.extend(jobs)
def return_jobs(self):
with self.lock:
jobs = self.workqueue
self.workqueue = []
return jobs
def jobcount(self):
return len(self.workqueue)
def collect(self):
wd = copy.copy(self.workdone)
self.workdone = []
return wd
def start_thread(self):
self.thread = threading.Thread(target=self.workloop)
self.thread.start()
def pop_if_possible(self):
with self.lock:
if len(self.workqueue):
job = self.workqueue.pop()
else:
job = None
return job
def workloop(self):
success_count = 0
job_count = 0
duration_total = 0
duration_success_total = 0
while True:
job = self.pop_if_possible()
if job:
nao = time.time()
job.run()
spent = time.time() - nao
if job.failcount == 0:
duration_success_total += spent
success_count += 1
job_count += 1
duration_total += spent
self.workdone.append(job)
elif not self.thread:
break
if self.done.is_set(): break
time.sleep(0.01)
if self.thread:
succ_rate = try_div(success_count, job_count)*100
avg_succ_t = try_div(duration_success_total, success_count)
avg_fail_t = try_div(duration_total-duration_success_total, job_count-success_count)
avg_t = try_div(duration_total, job_count)
_log("terminated, %d/%d (%.2f%%), avg.time S/F/T %.2f, %.2f, %.2f" \
% (success_count, job_count, succ_rate, avg_succ_t, avg_fail_t, avg_t) \
, self.id)
class Proxywatchd():
def stop(self):
_log('halting... (%d thread(s))' % len([item for item in self.threads if True]), 'watchd')
self.stopping.set()
def _cleanup(self):
for wt in self.threads:
wt.stop()
for wt in self.threads:
wt.term()
self.collect_work()
self.submit_collected()
self.stopped.set()
def finish(self):
if not self.in_background: self._cleanup()
while not self.stopped.is_set(): time.sleep(0.1)
success_rate = try_div(self.totals['success'], self.totals['submitted']) * 100
_log("total results: %d/%d (%.2f%%)"%(self.totals['success'], self.totals['submitted'], success_rate), "watchd")
def _prep_db(self):
self.mysqlite = mysqlite.mysqlite(config.watchd.database, str)
def _close_db(self):
if self.mysqlite:
self.mysqlite.close()
self.mysqlite = None
def __init__(self):
config.load()
self.in_background = False
self.threads = []
self.stopping = threading.Event()
self.stopped = threading.Event()
# create table if needed
self._prep_db()
self.mysqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, source BLOB, dronebl INT, proto TEXT, success_count INT, total_duration INT)')
self.mysqlite.commit()
self._close_db()
self.submit_after = config.watchd.submit_after # number of collected jobs before writing db
self.jobs = []
self.collected = []
self.totals = {
'submitted':0,
'success':0,
}
def fetch_rows(self):
q = 'SELECT proxy,proto,failed,success_count,total_duration,country FROM proxylist WHERE failed >= ? and failed < ? and (tested + ? + (failed * ?)) < ? ORDER BY RANDOM()'
rows = self.mysqlite.execute(q, (0, config.watchd.max_fail, config.watchd.checktime, config.watchd.perfail_checktime, time.time())).fetchall()
# check oldies ?
if len(rows) < config.watchd.threads and config.watchd.oldies:
## disable tor safeguard for old proxies
if self.tor_safeguard: self.tor_safeguard = False
q += ' LIMIT ?'
rows = self.mysqlite.execute(q, (config.watchd.max_fail, config.watchd.max_fail*2, config.watchd.checktime, config.watchd.oldies_checktime, time.time(), config.watchd.threads*config.watchd.oldies_multi)).fetchall()
return rows
def prepare_jobs(self):
self._prep_db()
## enable tor safeguard by default
self.tor_safeguard = config.watchd.tor_safeguard
rows = self.fetch_rows()
for row in rows:
job = WorkerJob(row[0], row[1], row[2], row[3], row[4], row[5])
self.jobs.append(job)
self._close_db()
def collect_work(self):
for wt in self.threads:
self.collected.extend(wt.collect())
def collect_unfinished(self):
for wt in self.threads:
jobs = wt.return_jobs()
self.jobs.extend(jobs)
if len(self.jobs):
_log("collected %d unfinished jobs"%len(self.jobs), "watchd")
def submit_collected(self):
if len(self.collected) == 0: return True
sc = 0
args = []
for job in self.collected:
if job.failcount == 0: sc += 1
args.append( (job.failcount, job.checktime, 1, job.country, job.proto, job.success_count, job.total_duration, job.proxy) )
success_rate = (float(sc) / len(self.collected)) * 100
ret = True
if len(self.collected) >= 100 and success_rate <= config.watchd.outage_threshold and self.tor_safeguard:
_log("WATCHD %.2f%% SUCCESS RATE - tor circuit blocked? won't submit fails"%success_rate, "ERROR")
if sc == 0: return False
args = []
for job in self.collected:
if job.failcount == 0:
args.append( (job.failcount, job.checktime, 1, job.country, job.proto, job.success_count, job.total_duration, job.proxy) )
ret = False
_log("updating %d DB entries (success rate: %.2f%%)"%(len(self.collected), success_rate), 'watchd')
self._prep_db()
query = 'UPDATE proxylist SET failed=?,tested=?,dronebl=?,country=?,proto=?,success_count=?,total_duration=? WHERE proxy=?'
self.mysqlite.executemany(query, args)
self.mysqlite.commit()
self._close_db()
self.collected = []
self.totals['submitted'] += len(args)
self.totals['success'] += sc
return ret
def start(self):
if config.watchd.threads == 1 and _run_standalone:
return self._run()
else:
return self._run_background()
def run(self):
if self.in_background:
while 1: time.sleep(1)
def _run_background(self):
self.in_background = True
t = threading.Thread(target=self._run)
t.start()
def _run(self):
_log('starting...', 'watchd')
for i in range(config.watchd.threads):
threadid = ''.join( [ random.choice(string.letters) for x in range(5) ] )
wt = WorkerThread(threadid)
if self.in_background:
wt.start_thread()
self.threads.append(wt)
time.sleep( (random.random()/100) )
sleeptime = 0
while True:
if self.stopping.is_set():
if self.in_background: self._cleanup()
break
if sleeptime == 0:
sleeptime = 1
else:
time.sleep(1)
sleeptime -= 1
continue
if self.threads[random.choice(xrange(len(self.threads)))].jobcount() == 0:
self.collect_unfinished()
if not len(self.jobs):
self.collect_work()
if not self.submit_collected() and self.tor_safeguard:
_log("zzZzZzzZ sleeping 1 minute(s) due to tor issues - consider decreasing thread number!", "watchd")
self.collect_unfinished()
sleeptime = 1*60
else:
self.prepare_jobs()
else:
if len(self.jobs) < len(self.threads):
# allow threads enough time to consume the jobs
sleeptime = 10
if len(self.jobs):
_log("handing out %d jobs to %d thread(s)"% (len(self.jobs), len(self.threads)), 'watchd')
jpt = len(self.jobs)/len(self.threads)
if len(self.jobs)/float(len(self.threads)) - jpt > 0.0: jpt += 1
for tid in xrange(len(self.threads)):
self.threads[tid].add_jobs(self.jobs[tid*jpt:tid*jpt+jpt])
self.jobs = []
if not self.in_background: # single_thread scenario
self.threads[0].workloop()
self.collect_work()
if len(self.collected) > self.submit_after:
if not self.submit_collected() and self.tor_safeguard:
_log("zzZzZzzZ sleeping 1 minute(s) due to tor issues - consider decreasing thread number!", "watchd")
self.collect_unfinished()
sleeptime = 1*60
time.sleep(1)
sleeptime -= 1
if __name__ == '__main__':
_run_standalone = True
config.load()
w = Proxywatchd()
try:
w.start()
w.run()
except KeyboardInterrupt as e:
pass
finally:
w.stop()
w.finish()