diff --git a/config.ini.sample b/config.ini.sample index f675586..b7c9a02 100644 --- a/config.ini.sample +++ b/config.ini.sample @@ -4,17 +4,11 @@ database = proxylist.sqlite proxy_max_fail = 5 [watcherd] -enabled = true proxy_file = false -checktime = 1800 threads = 10 timeout = 15 -read_timeout = 20 -max_fail = 5 [proxyfind] -enabled = true search = true -maxfail = 10 timeout = 30 threads = 3 diff --git a/config.py b/config.py new file mode 100644 index 0000000..421c064 --- /dev/null +++ b/config.py @@ -0,0 +1,32 @@ +from ConfigParser import SafeConfigParser + +_loaded = False + +def load(): + if _loaded: return + global database, maxfail, search, torhosts, watchd_threads, checktime, timeout, read_timeout + + ## read the config files + parser = SafeConfigParser() + parser.read('config.ini') + + database = parser.get('global', 'database') + maxfail = parser.getint('global', 'proxy_max_fail') + + search = parser.getboolean('proxyfind', 'search') + + torhosts = [ str(i).strip() for i in parser.get('global', 'tor_host').split(',') ] + watchd_threads = parser.getint('watcherd', 'threads') + timeout = parser.getint('watcherd', 'timeout') + + # allow overriding select items from the commandline + import argparse + aparse = argparse.ArgumentParser() + aparse.add_argument('--watchd_threads', help="how many proxy checker threads to spin up, 0==none, default: 10", type=int, default=watchd_threads, required=False) + args = aparse.parse_args() + + watchd_threads = args.watchd_threads + + global servers + with open('servers.txt', 'r') as handle: + servers = handle.read().split('\n') diff --git a/ppf.py b/ppf.py index a91a4de..c87bf48 100755 --- a/ppf.py +++ b/ppf.py @@ -6,13 +6,13 @@ import random, time import re import urllib import hashlib -from ConfigParser import SafeConfigParser from requests.packages.urllib3.exceptions import InsecureRequestWarning requests.packages.urllib3.disable_warnings(InsecureRequestWarning) import mysqlite import proxywatchd from misc import _log from soup_parser import soupify +import config base_header = { 'Accept-Language':'en-US,en;q=0.8', @@ -24,7 +24,6 @@ base_header = { searx_instances = ('https://searx.me', 'https://searx.xyz', 'https://searx.site', 'https://searx.win', 'https://searx.ru', 'https://stemy.me/searx', 'https://searx.at', 'https://listi.me', 'https://searx.dk', 'https://searx.laquadrature.net' ) retry_messages = ('Engines cannot retrieve results', 'Rate limit exceeded') -CONFIG = 'config.ini' def cleanhtml(raw_html): cleanr = re.compile('<.*?>') @@ -45,6 +44,7 @@ def import_from_file(fn, sqlite): def fetch_contents(uri): headers = base_header try: resp = requests.get(uri, timeout=45, headers=headers, verify=False, proxies=proxies) + except KeyboardInterrupt as e: raise e except: return '' data = resp.text @@ -69,7 +69,7 @@ def insert_proxies(proxies, uri, sqlite): def proxyfind(sqlite = None): #print('entering proxyfind...') - if not sqlite: sqlite = mysqlite.mysqlite(database,str) + if not sqlite: sqlite = mysqlite.mysqlite(config.database,str) uris = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE error=0 and url not like "%github%" ORDER BY RANDOM() LIMIT 10').fetchall() ] @@ -113,6 +113,7 @@ def proxyleech(sqlite, rows): for row in rows: try: content = fetch_contents(row[0]) + except KeyboardInterrupt as e: raise e except: content = '' uniques = [] @@ -120,6 +121,7 @@ def proxyleech(sqlite, rows): if p in uniques: continue try: if not is_reserved_ipv4(p.split(':')[0]): uniques.append(p) + except KeyboardInterrupt as e: raise e except: pass @@ -149,16 +151,10 @@ def proxyleech(sqlite, rows): if __name__ == '__main__': - ## read the config files - parser = SafeConfigParser() - parser.read(CONFIG) + config.load() + proxies={'http':'socks4://%s' % random.choice(config.torhosts),'https':'socks4://%s' % random.choice(config.torhosts)} - database = parser.get('global', 'database') - search = parser.getboolean('proxyfind', 'search') - tor_hosts = parser.get('global', 'tor_host').split(',') - proxies={'http':'socks4://%s' % random.choice(tor_hosts),'https':'socks4://%s' % random.choice(tor_hosts)} - - sqlite = mysqlite.mysqlite(database, str) + sqlite = mysqlite.mysqlite(config.database, str) ## create dbs if required sqlite.execute('CREATE TABLE IF NOT EXISTS uris (added INT, url TEXT, check_time INT, error INT, driver INT, hash TEXT)') @@ -178,7 +174,11 @@ if __name__ == '__main__': empty = [ urignore.append(i.split('/')[2]) for i in searx_instances ] # start proxy watcher - watcherd = proxywatchd.Proxywatchd(CONFIG) if parser.getboolean('watcherd', 'enabled') else None + if config.watchd_threads > 0: + watcherd = proxywatchd.Proxywatchd() + watcherd.start() + else: + watcherd = None while True: try: @@ -186,13 +186,14 @@ if __name__ == '__main__': rows = [ [i[0],i[1],i[2]] for i in sqlite.execute('SELECT url,hash,error FROM uris WHERE (check_time= 180: - _log('Proxywatchd threads: %d/%d' % (len(threads), self.maxthreads)) - self.echoise = time.time() - - self.mysqlite.close() +class WorkerJob(): + def __init__(self, proxy, proto, failcount): + self.proxy = proxy + self.proto = proto + self.failcount = failcount + self.nextcheck = None + self.duration = None def is_drone_bl(self, proxy): p = proxy.split(':')[0] - proxies = {'http':'socks4://%s:%s@%s' % (p,p,random.choice(self.torhosts))} + proxies = {'http':'socks4://%s:%s@%s' % (p,p,random.choice(config.torhosts))} resp = requests.get('http://dronebl.org/lookup?ip=%s' % p, proxies=proxies) if 'No incidents regarding' in resp.text: return 0 else: return 1 - def connect_socket(self, proxy, servers, proto = None): - protos = ['http', 'socks5', 'socks4'] if proto is None else proto + def connect_socket(self): + protos = ['http', 'socks5', 'socks4'] if self.proto is None else self.proto for proto in protos: - torhost = random.choice(self.torhosts) + torhost = random.choice(config.torhosts) duration = time.time() - proxies = [ rocksock.RocksockProxyFromURL('socks4://%s' % torhost), - rocksock.RocksockProxyFromURL('%s://%s' % (proto, proxy[0])), - ] + proxies = [ + rocksock.RocksockProxyFromURL('socks4://%s' % torhost), + rocksock.RocksockProxyFromURL('%s://%s' % (proto, self.proxy)), + ] - srv = random.choice(servers).strip() + srv = random.choice(config.servers).strip() try: - sock = rocksock.Rocksock(host=srv, port=6697, ssl=True, proxies=proxies, timeout=self.timeout) + sock = rocksock.Rocksock(host=srv, port=6697, ssl=True, proxies=proxies, timeout=config.timeout) sock.connect() sock.send('%s\n' % random.choice(['NICK', 'USER', 'JOIN', 'MODE', 'PART', 'INVITE', 'KNOCK', 'WHOIS', 'WHO', 'NOTICE', 'PRIVMSG', 'PING', 'QUIT'])) return sock, proto, duration, torhost, srv - + except KeyboardInterrupt as e: + raise(e) except: sock.disconnect() return None, None, None, None, None - def daemon(self, servers): - sqlite = mysqlite.mysqlite(self.database, str) - threadid = ''.join( [ random.choice(string.letters) for x in range(5) ] ) + def run(self): + self.nextcheck = (time.time() + 1800 + ((1+int(self.failcount)) * 3600)) - q = 'SELECT proxy,failed,country,proto FROM proxylist WHERE failed 0.0: jpt += 1 + for tid in range(config.watchd_threads): + self.threads[tid].add_jobs(self.jobs[tid*jpt:tid*jpt+jpt]) + self.jobs = [] + + if not self.in_background: # single_thread scenario + self.threads[0].workloop() + + self.collect_work() + + if len(self.collected) > self.submit_after: + self.submit_collected() + + time.sleep(1) + + +if __name__ == '__main__': + _run_standalone = True + + config.load() + + w = Proxywatchd() + try: + w.start() + w.run() + except KeyboardInterrupt as e: + raise e + finally: + w.stop() + w.finish()