420 lines
12 KiB
Python
420 lines
12 KiB
Python
#!/usr/bin/env python
|
|
|
|
import threading
|
|
import time, random, string, re, copy
|
|
from geoip import geolite2
|
|
|
|
from config import Config
|
|
|
|
import mysqlite
|
|
from misc import _log
|
|
import rocksock
|
|
|
|
config = Config()
|
|
|
|
_run_standalone = False
|
|
cached_dns = dict()
|
|
|
|
def try_div(a, b):
|
|
if b != 0: return a/float(b)
|
|
return 0
|
|
|
|
def socks4_resolve(srvname, server_port):
|
|
srv = srvname
|
|
if srv in cached_dns:
|
|
srv = cached_dns[srvname]
|
|
if config.watchd.debug:
|
|
_log("using cached ip (%s) for %s (proxy: %s)"%(srv, srvname, self.proxy), "debug")
|
|
else:
|
|
dns_fail = False
|
|
try:
|
|
af, sa = rocksock.resolve(rocksock.RocksockHostinfo(srvname, server_port), want_v4=True)
|
|
if sa is not None:
|
|
cached_dns[srvname] = sa[0]
|
|
srv = sa[0]
|
|
else: dns_fail = True
|
|
except rocksock.RocksockException as e:
|
|
assert(e.get_errortype() == rocksock.RS_ET_GAI)
|
|
dns_fail = True
|
|
if dns_fail:
|
|
fail_inc = 0
|
|
_log("could not resolve connection target %s"%srvname, "ERROR")
|
|
return False
|
|
return srv
|
|
|
|
|
|
class WorkerJob():
|
|
def __init__(self, proxy, proto, failcount, success_count, total_duration, country):
|
|
self.proxy = proxy
|
|
self.proto = proto
|
|
self.failcount = failcount
|
|
self.checktime = None
|
|
self.success_count = success_count
|
|
self.total_duration = total_duration
|
|
self.country = country
|
|
|
|
def connect_socket(self):
|
|
srvname = random.choice(config.servers).strip()
|
|
protos = ['http', 'socks5', 'socks4'] if self.proto is None else [self.proto]
|
|
use_ssl = random.choice([0,1]) if config.watchd.use_ssl == 2 else config.watchd.use_ssl
|
|
server_port = 6697 if use_ssl else 6667
|
|
|
|
fail_inc = 1
|
|
|
|
for proto in protos:
|
|
torhost = random.choice(config.torhosts)
|
|
# socks4 (without 4a) requires a raw ip address
|
|
# rocksock automatically resolves if needed, but it's more
|
|
# efficient to cache the result.
|
|
if proto == 'socks4': srv = socks4_resolve(srvname, server_port)
|
|
else: srv = srvname
|
|
## skip socks4 failed resolution
|
|
if not srv: continue
|
|
|
|
duration = time.time()
|
|
proxies = [
|
|
rocksock.RocksockProxyFromURL('socks4://%s' % torhost),
|
|
rocksock.RocksockProxyFromURL('%s://%s' % (proto, self.proxy)),
|
|
]
|
|
|
|
try:
|
|
sock = rocksock.Rocksock(host=srv, port=server_port, ssl=use_ssl, proxies=proxies, timeout=config.watchd.timeout)
|
|
sock.connect()
|
|
sock.send('NICK\n')
|
|
return sock, proto, duration, torhost, srvname, 0
|
|
except rocksock.RocksockException as e:
|
|
if config.watchd.debug:
|
|
_log("proxy failed: %s://%s: %s"%(proto, self.proxy, e.get_errormessage()), 'debug')
|
|
|
|
et = e.get_errortype()
|
|
err = e.get_error()
|
|
fp = e.get_failedproxy()
|
|
|
|
sock.disconnect()
|
|
|
|
if et == rocksock.RS_ET_OWN:
|
|
if fp == 1 and \
|
|
err == rocksock.RS_E_REMOTE_DISCONNECTED or \
|
|
err == rocksock.RS_E_HIT_TIMEOUT:
|
|
# proxy is not online, so don't waste time trying all possible protocols
|
|
break
|
|
elif fp == 0 and \
|
|
err == rocksock.RS_E_TARGET_CONN_REFUSED:
|
|
fail_inc = 0
|
|
if random.randint(0, (config.watchd.threads-1)/2) == 0:
|
|
_log("could not connect to proxy 0, sleep 5s", "ERROR")
|
|
time.sleep(5)
|
|
elif et == rocksock.RS_ET_GAI:
|
|
assert(0)
|
|
fail_inc = 0
|
|
_log("could not resolve connection target %s"%srvname, "ERROR")
|
|
break
|
|
|
|
except KeyboardInterrupt as e:
|
|
raise(e)
|
|
|
|
return None, None, None, None, None, fail_inc
|
|
|
|
def run(self):
|
|
self.checktime = int(time.time())
|
|
|
|
sock, proto, duration, tor, srv, failinc = self.connect_socket()
|
|
if not sock:
|
|
self.failcount += failinc
|
|
return
|
|
try:
|
|
recv = sock.recv(6)
|
|
|
|
# good data
|
|
if re.match('^(:|NOTICE)', recv, re.IGNORECASE):
|
|
duration = (time.time() - duration)
|
|
|
|
if not self.country or self.country == 'unknown' or self.country == 'N/A':
|
|
match = geolite2.lookup(self.proxy.split(':')[0])
|
|
if match is not None: self.country = match.country
|
|
else: self.country = 'N/A'
|
|
|
|
self.proto = proto
|
|
self.failcount = 0
|
|
self.success_count = self.success_count + 1
|
|
self.total_duration += int(duration*1000)
|
|
torstats = "" if len(config.torhosts)==1 else ' tor: %s;'%tor
|
|
recvstats = "".join([x if x in string.printable and ord(x) > 32 else '.' for x in recv])
|
|
_log('%s://%s (%s) d: %.2f sec(s);%s srv: %s; recv: %s' % (proto, self.proxy, self.country, duration, torstats, srv, recvstats), 'xxxxx')
|
|
except KeyboardInterrupt as e:
|
|
raise e
|
|
except rocksock.RocksockException as e:
|
|
self.failcount += 1
|
|
finally:
|
|
sock.disconnect()
|
|
|
|
|
|
class WorkerThread():
|
|
def __init__ (self, id):
|
|
self.id = id
|
|
self.done = threading.Event()
|
|
self.thread = None
|
|
self.workqueue = []
|
|
self.workdone = []
|
|
self.lock = threading.Lock()
|
|
def stop(self):
|
|
self.done.set()
|
|
def term(self):
|
|
if self.thread: self.thread.join()
|
|
def add_jobs(self, jobs):
|
|
with self.lock:
|
|
self.workqueue.extend(jobs)
|
|
def return_jobs(self):
|
|
with self.lock:
|
|
jobs = self.workqueue
|
|
self.workqueue = []
|
|
return jobs
|
|
def jobcount(self):
|
|
return len(self.workqueue)
|
|
def collect(self):
|
|
wd = copy.copy(self.workdone)
|
|
self.workdone = []
|
|
return wd
|
|
def start_thread(self):
|
|
self.thread = threading.Thread(target=self.workloop)
|
|
self.thread.start()
|
|
def pop_if_possible(self):
|
|
with self.lock:
|
|
if len(self.workqueue):
|
|
job = self.workqueue.pop()
|
|
else:
|
|
job = None
|
|
return job
|
|
def workloop(self):
|
|
success_count = 0
|
|
job_count = 0
|
|
duration_total = 0
|
|
duration_success_total = 0
|
|
while True:
|
|
job = self.pop_if_possible()
|
|
if job:
|
|
nao = time.time()
|
|
job.run()
|
|
spent = time.time() - nao
|
|
if job.failcount == 0:
|
|
duration_success_total += spent
|
|
success_count += 1
|
|
job_count += 1
|
|
duration_total += spent
|
|
self.workdone.append(job)
|
|
elif not self.thread:
|
|
break
|
|
if self.done.is_set(): break
|
|
time.sleep(0.01)
|
|
if self.thread:
|
|
succ_rate = try_div(success_count, job_count)*100
|
|
avg_succ_t = try_div(duration_success_total, success_count)
|
|
avg_fail_t = try_div(duration_total-duration_success_total, job_count-success_count)
|
|
avg_t = try_div(duration_total, job_count)
|
|
_log("terminated, %d/%d (%.2f%%), avg.time S/F/T %.2f, %.2f, %.2f" \
|
|
% (success_count, job_count, succ_rate, avg_succ_t, avg_fail_t, avg_t) \
|
|
, self.id)
|
|
|
|
class Proxywatchd():
|
|
|
|
def stop(self):
|
|
_log('halting... (%d thread(s))' % len([item for item in self.threads if True]), 'watchd')
|
|
self.stopping.set()
|
|
|
|
def _cleanup(self):
|
|
for wt in self.threads:
|
|
wt.stop()
|
|
for wt in self.threads:
|
|
wt.term()
|
|
self.collect_work()
|
|
self.submit_collected()
|
|
self.stopped.set()
|
|
|
|
def finish(self):
|
|
if not self.in_background: self._cleanup()
|
|
while not self.stopped.is_set(): time.sleep(0.1)
|
|
success_rate = try_div(self.totals['success'], self.totals['submitted']) * 100
|
|
_log("total results: %d/%d (%.2f%%)"%(self.totals['success'], self.totals['submitted'], success_rate), "watchd")
|
|
|
|
def _prep_db(self):
|
|
self.mysqlite = mysqlite.mysqlite(config.watchd.database, str)
|
|
def _close_db(self):
|
|
if self.mysqlite:
|
|
self.mysqlite.close()
|
|
self.mysqlite = None
|
|
def __init__(self):
|
|
config.load()
|
|
self.in_background = False
|
|
self.threads = []
|
|
self.stopping = threading.Event()
|
|
self.stopped = threading.Event()
|
|
|
|
# create table if needed
|
|
self._prep_db()
|
|
self.mysqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, source BLOB, dronebl INT, proto TEXT, success_count INT, total_duration INT)')
|
|
self.mysqlite.commit()
|
|
self._close_db()
|
|
|
|
self.submit_after = config.watchd.submit_after # number of collected jobs before writing db
|
|
self.jobs = []
|
|
self.collected = []
|
|
self.totals = {
|
|
'submitted':0,
|
|
'success':0,
|
|
}
|
|
|
|
def fetch_rows(self):
|
|
q = 'SELECT proxy,proto,failed,success_count,total_duration,country FROM proxylist WHERE failed >= ? and failed < ? and (tested + ? + (failed * ?)) < ? ORDER BY RANDOM()'
|
|
rows = self.mysqlite.execute(q, (0, config.watchd.max_fail, config.watchd.checktime, config.watchd.perfail_checktime, time.time())).fetchall()
|
|
# check oldies ?
|
|
if len(rows) < config.watchd.threads and config.watchd.oldies:
|
|
## disable tor safeguard for old proxies
|
|
if self.tor_safeguard: self.tor_safeguard = False
|
|
q += ' LIMIT ?'
|
|
rows = self.mysqlite.execute(q, (config.watchd.max_fail, config.watchd.max_fail*2, config.watchd.checktime, config.watchd.oldies_checktime, time.time(), config.watchd.threads*config.watchd.oldies_multi)).fetchall()
|
|
return rows
|
|
|
|
def prepare_jobs(self):
|
|
self._prep_db()
|
|
## enable tor safeguard by default
|
|
self.tor_safeguard = config.watchd.tor_safeguard
|
|
rows = self.fetch_rows()
|
|
for row in rows:
|
|
job = WorkerJob(row[0], row[1], row[2], row[3], row[4], row[5])
|
|
self.jobs.append(job)
|
|
self._close_db()
|
|
|
|
def collect_work(self):
|
|
for wt in self.threads:
|
|
self.collected.extend(wt.collect())
|
|
|
|
def collect_unfinished(self):
|
|
for wt in self.threads:
|
|
jobs = wt.return_jobs()
|
|
self.jobs.extend(jobs)
|
|
if len(self.jobs):
|
|
_log("collected %d unfinished jobs"%len(self.jobs), "watchd")
|
|
|
|
def submit_collected(self):
|
|
if len(self.collected) == 0: return True
|
|
sc = 0
|
|
args = []
|
|
for job in self.collected:
|
|
if job.failcount == 0: sc += 1
|
|
args.append( (job.failcount, job.checktime, 1, job.country, job.proto, job.success_count, job.total_duration, job.proxy) )
|
|
|
|
success_rate = (float(sc) / len(self.collected)) * 100
|
|
ret = True
|
|
if len(self.collected) >= 100 and success_rate <= config.watchd.outage_threshold and self.tor_safeguard:
|
|
_log("WATCHD %.2f%% SUCCESS RATE - tor circuit blocked? won't submit fails"%success_rate, "ERROR")
|
|
if sc == 0: return False
|
|
args = []
|
|
for job in self.collected:
|
|
if job.failcount == 0:
|
|
args.append( (job.failcount, job.checktime, 1, job.country, job.proto, job.success_count, job.total_duration, job.proxy) )
|
|
ret = False
|
|
|
|
_log("updating %d DB entries (success rate: %.2f%%)"%(len(self.collected), success_rate), 'watchd')
|
|
self._prep_db()
|
|
query = 'UPDATE proxylist SET failed=?,tested=?,dronebl=?,country=?,proto=?,success_count=?,total_duration=? WHERE proxy=?'
|
|
self.mysqlite.executemany(query, args)
|
|
self.mysqlite.commit()
|
|
self._close_db()
|
|
self.collected = []
|
|
self.totals['submitted'] += len(args)
|
|
self.totals['success'] += sc
|
|
return ret
|
|
|
|
def start(self):
|
|
if config.watchd.threads == 1 and _run_standalone:
|
|
return self._run()
|
|
else:
|
|
return self._run_background()
|
|
|
|
def run(self):
|
|
if self.in_background:
|
|
while 1: time.sleep(1)
|
|
|
|
def _run_background(self):
|
|
self.in_background = True
|
|
t = threading.Thread(target=self._run)
|
|
t.start()
|
|
|
|
def _run(self):
|
|
_log('starting...', 'watchd')
|
|
|
|
for i in range(config.watchd.threads):
|
|
threadid = ''.join( [ random.choice(string.letters) for x in range(5) ] )
|
|
wt = WorkerThread(threadid)
|
|
if self.in_background:
|
|
wt.start_thread()
|
|
self.threads.append(wt)
|
|
time.sleep( (random.random()/100) )
|
|
|
|
sleeptime = 0
|
|
|
|
while True:
|
|
|
|
if self.stopping.is_set():
|
|
if self.in_background: self._cleanup()
|
|
break
|
|
|
|
if sleeptime == 0:
|
|
sleeptime = 1
|
|
else:
|
|
time.sleep(1)
|
|
sleeptime -= 1
|
|
continue
|
|
|
|
if self.threads[random.choice(xrange(len(self.threads)))].jobcount() == 0:
|
|
self.collect_unfinished()
|
|
if not len(self.jobs):
|
|
self.collect_work()
|
|
if not self.submit_collected() and self.tor_safeguard:
|
|
_log("zzZzZzzZ sleeping 1 minute(s) due to tor issues - consider decreasing thread number!", "watchd")
|
|
self.collect_unfinished()
|
|
sleeptime = 1*60
|
|
else:
|
|
self.prepare_jobs()
|
|
else:
|
|
if len(self.jobs) < len(self.threads):
|
|
# allow threads enough time to consume the jobs
|
|
sleeptime = 10
|
|
if len(self.jobs):
|
|
_log("handing out %d jobs to %d thread(s)"% (len(self.jobs), len(self.threads)), 'watchd')
|
|
jpt = len(self.jobs)/len(self.threads)
|
|
if len(self.jobs)/float(len(self.threads)) - jpt > 0.0: jpt += 1
|
|
for tid in xrange(len(self.threads)):
|
|
self.threads[tid].add_jobs(self.jobs[tid*jpt:tid*jpt+jpt])
|
|
self.jobs = []
|
|
|
|
if not self.in_background: # single_thread scenario
|
|
self.threads[0].workloop()
|
|
|
|
self.collect_work()
|
|
|
|
if len(self.collected) > self.submit_after:
|
|
if not self.submit_collected() and self.tor_safeguard:
|
|
_log("zzZzZzzZ sleeping 1 minute(s) due to tor issues - consider decreasing thread number!", "watchd")
|
|
self.collect_unfinished()
|
|
sleeptime = 1*60
|
|
|
|
time.sleep(1)
|
|
sleeptime -= 1
|
|
|
|
|
|
if __name__ == '__main__':
|
|
_run_standalone = True
|
|
|
|
config.load()
|
|
|
|
w = Proxywatchd()
|
|
try:
|
|
w.start()
|
|
w.run()
|
|
except KeyboardInterrupt as e:
|
|
pass
|
|
finally:
|
|
w.stop()
|
|
w.finish()
|