diff --git a/comboparse.py b/comboparse.py new file mode 100644 index 0000000..911b882 --- /dev/null +++ b/comboparse.py @@ -0,0 +1,86 @@ +from ConfigParser import SafeConfigParser, NoOptionError +from argparse import ArgumentParser +import sys + +class _Dummy(): + pass + +class ComboParser(object): + def __init__(self, ini): + self.items = [] + self.cparser = SafeConfigParser() + self.aparser = ArgumentParser() + self.ini = ini + self.items = [] + self.loaded = False + + def add_item(self, section, name, type, default, desc, required): + self.items.append({ + 'section':section, + 'name':name, + 'type':type, + 'default':default, + 'required':required, + }) + self.aparser.add_argument( + '--%s.%s'%(section, name), + help='%s, default: (%s)'%(desc, str(default)), + type=type, + default=None, + required=False + ) + def load(self): + if self.loaded: return + self.loaded = True + + try: self.cparser.read(self.ini) + except: pass + args = self.aparser.parse_args() + for item in self.items: + try: + obj = getattr(self, item['section']) + except AttributeError: + setattr(self, item['section'], _Dummy()) + obj = getattr(self, item['section']) + + setattr(obj, item['name'], item['default']) + inner = getattr(obj, item['name']) + + item['found'] = True + try: + if item['type'] is bool : inner = self.cparser.getboolean(item['section'], item['name']) + elif item['type'] is float: inner = self.cparser.getfloat(item['section'], item['name']) + elif item['type'] is int : inner = self.cparser.getint(item['section'], item['name']) + elif item['type'] is str : inner = self.cparser.get(item['section'], item['name']) + except NoOptionError: + item['found'] = False + try: + arg = getattr(args, '%s.%s'%(item['section'], item['name'])) + if arg is not None: + inner = arg + item['found'] = True + except: pass + if not item['found']: + if item['required']: + sys.stderr.write('error: required config item "%s" not found in section "%s" of "%s"!\n'%(item['name'], item['section'], self.ini)) + sys.exit(1) + else: + sys.stderr.write('warning: assigned default value of "%s" to "%s.%s"\n'%(str(item['default']), item['section'], item['name'])) + setattr(obj, item['name'], inner) + + +# TEST CODE +def _main(): + config = ComboParser('config.ini') + config.add_item('watchd', 'debug', bool, False, 'turn additional debug info on', False) + config.add_item('watchd', 'float', float, 0.1, 'a float test', True) + config.add_item('watchd', 'strupp', str, "sup", 'a str test', False) + config.add_item('common', 'tor_host', str, '127.0.0.1:9050', 'address of tor proxy', True) + config.load() + print config.watchd.debug + print config.watchd.float + print config.watchd.strupp + print config.common.tor_host + +if __name__ == '__main__': + _main() diff --git a/config.ini.sample b/config.ini.sample index f3a4bff..0182c75 100644 --- a/config.ini.sample +++ b/config.ini.sample @@ -1,9 +1,8 @@ -[global] -tor_host = 127.0.0.1:9050 +[common] +tor_hosts = 127.0.0.1:9050 database = proxylist.sqlite -[watcherd] -proxy_file = false +[watchd] max_fail = 5 threads = 10 timeout = 15 @@ -11,10 +10,9 @@ submit_after = 200 use_ssl = false debug = false -[proxyfind] +[ppf] search = true timeout = 30 -threads = 3 checktime = 3600 perfail_checktime = 3600 diff --git a/config.py b/config.py index 32d7102..4cf79a9 100644 --- a/config.py +++ b/config.py @@ -1,48 +1,24 @@ -from ConfigParser import SafeConfigParser +from comboparse import ComboParser -_loaded = False +class Config(ComboParser): + def load(self): + super(Config, self).load() + self.torhosts = [ str(i).strip() for i in self.common.tor_hosts.split(',') ] + with open('servers.txt', 'r') as handle: + self.servers = [x.strip() for x in handle.readlines() if len(x.strip()) > 0] + def __init__(self): + super(Config, self).__init__('config.ini') + self.add_item('common', 'tor_hosts', str, '127.0.0.1:9050', 'comma-separated list of tor proxy address(es)', True) + self.add_item('common', 'database', str, 'proxylist.sqlite', 'filename of database', True) -class phantom(): - def __init__(self): pass + self.add_item('watchd', 'max_fail', int, 5, 'number of fails after which a proxy is considered dead', False) + self.add_item('watchd', 'threads', int, 10, 'number of threads watchd uses to check proxies', True) + self.add_item('watchd', 'timeout', int, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False) + self.add_item('watchd', 'submit_after', int, 200, 'min. number of tested proxies for DB write', False) + self.add_item('watchd', 'debug', bool, False, 'whether to print additional debug info', False) + self.add_item('watchd', 'use_ssl', bool, False, 'whether to use SSL and port 6697 to connect to targets (slower)', False) -def load(): - if _loaded: return - global database, maxfail, search, torhosts, watchd_threads, checktime, timeout, read_timeout, submit_after, use_ssl, url_checktime, url_perfail_checktime - - ## read the config files - parser = SafeConfigParser() - parser.read('config.ini') - - database = parser.get('global', 'database') - #maxfail = parser.getint('global', 'proxy_max_fail') - torhosts = [ str(i).strip() for i in parser.get('global', 'tor_host').split(',') ] - - global _watchd - _watchd = phantom() - _watchd.threads = parser.getint('watcherd', 'threads') - _watchd.timeout = parser.getint('watcherd', 'timeout') - _watchd.submit_after = parser.getint('watcherd', 'submit_after') - _watchd.use_ssl = parser.getboolean('watcherd', 'use_ssl') - _watchd.debug = parser.getboolean('watcherd', 'debug') - _watchd.maxfail = parser.getint('watcherd', 'max_fail') - - global _leechd - _leechd = phantom() - _leechd.checktime = parser.get('proxyfind', 'checktime') - _leechd.perfail_checktime = parser.get('proxyfind', 'perfail_checktime') - _leechd.search = parser.getboolean('proxyfind', 'search') - - global watchd_debug - watchd_debug = parser.getboolean('watcherd', 'debug') - - # allow overriding select items from the commandline - import argparse - aparse = argparse.ArgumentParser() - aparse.add_argument('--watchd_threads', help="how many proxy checker threads to spin up, 0==none, default: 10", type=int, default=_watchd.threads, required=False) - args = aparse.parse_args() - - _watchd.threads = args.watchd_threads - - global servers - with open('servers.txt', 'r') as handle: - servers = [x.strip() for x in handle.readlines() if len(x.strip()) > 0] + self.add_item('ppf', 'search', bool, True, 'whether to use searx search engine to find new proxy lists', False) + self.add_item('ppf', 'timeout', float, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False) + self.add_item('ppf', 'checktime', int, 3600, 'base checking interval for urls in db in seconds', False) + self.add_item('ppf', 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per experienced failure', False) diff --git a/ppf.py b/ppf.py index 93c03c8..6bf8986 100755 --- a/ppf.py +++ b/ppf.py @@ -8,10 +8,12 @@ import mysqlite import proxywatchd from misc import _log from soup_parser import soupify -import config +from config import Config from http2 import RsHttp, _parse_url import rocksock +config = Config() + base_header = { 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', } @@ -71,7 +73,7 @@ def insert_proxies(proxies, uri, sqlite, timestamp): _log('+%d item(s) from %s' % (len(new), uri), 'added') def proxyfind(sqlite = None): - if not sqlite: sqlite = mysqlite.mysqlite(config.database,str) + if not sqlite: sqlite = mysqlite.mysqlite(config.common.database,str) choice = random.choice(searx_instances) urls = [] @@ -157,14 +159,14 @@ if __name__ == '__main__': config.load() proxies={'http':'socks4://%s' % random.choice(config.torhosts),'https':'socks4://%s' % random.choice(config.torhosts)} - sqlite = mysqlite.mysqlite(config.database, str) + sqlite = mysqlite.mysqlite(config.common.database, str) ## create dbs if required sqlite.execute('CREATE TABLE IF NOT EXISTS uris (added INT, url TEXT, check_time INT, error INT, driver INT, hash TEXT)') sqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, dronebl INT, proto TEXT, success_count INT, total_duration INT)') sqlite.commit() import_from_file('import.txt', sqlite) - if config._leechd.search: + if config.ppf.search: ## load search terms with open('search_terms.txt', 'r') as f: search_terms = [ i.strip() for i in f.read().split('\n') if len(i.strip()) ] @@ -175,7 +177,7 @@ if __name__ == '__main__': empty = [ urignore.append(i.split('/')[2]) for i in searx_instances ] # start proxy watcher - if config._watchd.threads > 0: + if config.watchd.threads > 0: watcherd = proxywatchd.Proxywatchd() watcherd.start() else: @@ -185,11 +187,11 @@ if __name__ == '__main__': while True: try: ## any site that needs to be checked ? - rows = [ [i[0],i[1],i[2]] for i in sqlite.execute('SELECT url,hash,error FROM uris WHERE (check_time+?+(error*?)