implement combo config parser
allows all options to be overridden by command line. e.g. [watchd] threads=10 debug=false --watch.threads=50 --debug=true
This commit is contained in:
86
comboparse.py
Normal file
86
comboparse.py
Normal file
@@ -0,0 +1,86 @@
|
||||
from ConfigParser import SafeConfigParser, NoOptionError
|
||||
from argparse import ArgumentParser
|
||||
import sys
|
||||
|
||||
class _Dummy():
|
||||
pass
|
||||
|
||||
class ComboParser(object):
|
||||
def __init__(self, ini):
|
||||
self.items = []
|
||||
self.cparser = SafeConfigParser()
|
||||
self.aparser = ArgumentParser()
|
||||
self.ini = ini
|
||||
self.items = []
|
||||
self.loaded = False
|
||||
|
||||
def add_item(self, section, name, type, default, desc, required):
|
||||
self.items.append({
|
||||
'section':section,
|
||||
'name':name,
|
||||
'type':type,
|
||||
'default':default,
|
||||
'required':required,
|
||||
})
|
||||
self.aparser.add_argument(
|
||||
'--%s.%s'%(section, name),
|
||||
help='%s, default: (%s)'%(desc, str(default)),
|
||||
type=type,
|
||||
default=None,
|
||||
required=False
|
||||
)
|
||||
def load(self):
|
||||
if self.loaded: return
|
||||
self.loaded = True
|
||||
|
||||
try: self.cparser.read(self.ini)
|
||||
except: pass
|
||||
args = self.aparser.parse_args()
|
||||
for item in self.items:
|
||||
try:
|
||||
obj = getattr(self, item['section'])
|
||||
except AttributeError:
|
||||
setattr(self, item['section'], _Dummy())
|
||||
obj = getattr(self, item['section'])
|
||||
|
||||
setattr(obj, item['name'], item['default'])
|
||||
inner = getattr(obj, item['name'])
|
||||
|
||||
item['found'] = True
|
||||
try:
|
||||
if item['type'] is bool : inner = self.cparser.getboolean(item['section'], item['name'])
|
||||
elif item['type'] is float: inner = self.cparser.getfloat(item['section'], item['name'])
|
||||
elif item['type'] is int : inner = self.cparser.getint(item['section'], item['name'])
|
||||
elif item['type'] is str : inner = self.cparser.get(item['section'], item['name'])
|
||||
except NoOptionError:
|
||||
item['found'] = False
|
||||
try:
|
||||
arg = getattr(args, '%s.%s'%(item['section'], item['name']))
|
||||
if arg is not None:
|
||||
inner = arg
|
||||
item['found'] = True
|
||||
except: pass
|
||||
if not item['found']:
|
||||
if item['required']:
|
||||
sys.stderr.write('error: required config item "%s" not found in section "%s" of "%s"!\n'%(item['name'], item['section'], self.ini))
|
||||
sys.exit(1)
|
||||
else:
|
||||
sys.stderr.write('warning: assigned default value of "%s" to "%s.%s"\n'%(str(item['default']), item['section'], item['name']))
|
||||
setattr(obj, item['name'], inner)
|
||||
|
||||
|
||||
# TEST CODE
|
||||
def _main():
|
||||
config = ComboParser('config.ini')
|
||||
config.add_item('watchd', 'debug', bool, False, 'turn additional debug info on', False)
|
||||
config.add_item('watchd', 'float', float, 0.1, 'a float test', True)
|
||||
config.add_item('watchd', 'strupp', str, "sup", 'a str test', False)
|
||||
config.add_item('common', 'tor_host', str, '127.0.0.1:9050', 'address of tor proxy', True)
|
||||
config.load()
|
||||
print config.watchd.debug
|
||||
print config.watchd.float
|
||||
print config.watchd.strupp
|
||||
print config.common.tor_host
|
||||
|
||||
if __name__ == '__main__':
|
||||
_main()
|
||||
@@ -1,9 +1,8 @@
|
||||
[global]
|
||||
tor_host = 127.0.0.1:9050
|
||||
[common]
|
||||
tor_hosts = 127.0.0.1:9050
|
||||
database = proxylist.sqlite
|
||||
|
||||
[watcherd]
|
||||
proxy_file = false
|
||||
[watchd]
|
||||
max_fail = 5
|
||||
threads = 10
|
||||
timeout = 15
|
||||
@@ -11,10 +10,9 @@ submit_after = 200
|
||||
use_ssl = false
|
||||
debug = false
|
||||
|
||||
[proxyfind]
|
||||
[ppf]
|
||||
search = true
|
||||
timeout = 30
|
||||
threads = 3
|
||||
checktime = 3600
|
||||
perfail_checktime = 3600
|
||||
|
||||
|
||||
66
config.py
66
config.py
@@ -1,48 +1,24 @@
|
||||
from ConfigParser import SafeConfigParser
|
||||
from comboparse import ComboParser
|
||||
|
||||
_loaded = False
|
||||
class Config(ComboParser):
|
||||
def load(self):
|
||||
super(Config, self).load()
|
||||
self.torhosts = [ str(i).strip() for i in self.common.tor_hosts.split(',') ]
|
||||
with open('servers.txt', 'r') as handle:
|
||||
self.servers = [x.strip() for x in handle.readlines() if len(x.strip()) > 0]
|
||||
def __init__(self):
|
||||
super(Config, self).__init__('config.ini')
|
||||
self.add_item('common', 'tor_hosts', str, '127.0.0.1:9050', 'comma-separated list of tor proxy address(es)', True)
|
||||
self.add_item('common', 'database', str, 'proxylist.sqlite', 'filename of database', True)
|
||||
|
||||
class phantom():
|
||||
def __init__(self): pass
|
||||
self.add_item('watchd', 'max_fail', int, 5, 'number of fails after which a proxy is considered dead', False)
|
||||
self.add_item('watchd', 'threads', int, 10, 'number of threads watchd uses to check proxies', True)
|
||||
self.add_item('watchd', 'timeout', int, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False)
|
||||
self.add_item('watchd', 'submit_after', int, 200, 'min. number of tested proxies for DB write', False)
|
||||
self.add_item('watchd', 'debug', bool, False, 'whether to print additional debug info', False)
|
||||
self.add_item('watchd', 'use_ssl', bool, False, 'whether to use SSL and port 6697 to connect to targets (slower)', False)
|
||||
|
||||
def load():
|
||||
if _loaded: return
|
||||
global database, maxfail, search, torhosts, watchd_threads, checktime, timeout, read_timeout, submit_after, use_ssl, url_checktime, url_perfail_checktime
|
||||
|
||||
## read the config files
|
||||
parser = SafeConfigParser()
|
||||
parser.read('config.ini')
|
||||
|
||||
database = parser.get('global', 'database')
|
||||
#maxfail = parser.getint('global', 'proxy_max_fail')
|
||||
torhosts = [ str(i).strip() for i in parser.get('global', 'tor_host').split(',') ]
|
||||
|
||||
global _watchd
|
||||
_watchd = phantom()
|
||||
_watchd.threads = parser.getint('watcherd', 'threads')
|
||||
_watchd.timeout = parser.getint('watcherd', 'timeout')
|
||||
_watchd.submit_after = parser.getint('watcherd', 'submit_after')
|
||||
_watchd.use_ssl = parser.getboolean('watcherd', 'use_ssl')
|
||||
_watchd.debug = parser.getboolean('watcherd', 'debug')
|
||||
_watchd.maxfail = parser.getint('watcherd', 'max_fail')
|
||||
|
||||
global _leechd
|
||||
_leechd = phantom()
|
||||
_leechd.checktime = parser.get('proxyfind', 'checktime')
|
||||
_leechd.perfail_checktime = parser.get('proxyfind', 'perfail_checktime')
|
||||
_leechd.search = parser.getboolean('proxyfind', 'search')
|
||||
|
||||
global watchd_debug
|
||||
watchd_debug = parser.getboolean('watcherd', 'debug')
|
||||
|
||||
# allow overriding select items from the commandline
|
||||
import argparse
|
||||
aparse = argparse.ArgumentParser()
|
||||
aparse.add_argument('--watchd_threads', help="how many proxy checker threads to spin up, 0==none, default: 10", type=int, default=_watchd.threads, required=False)
|
||||
args = aparse.parse_args()
|
||||
|
||||
_watchd.threads = args.watchd_threads
|
||||
|
||||
global servers
|
||||
with open('servers.txt', 'r') as handle:
|
||||
servers = [x.strip() for x in handle.readlines() if len(x.strip()) > 0]
|
||||
self.add_item('ppf', 'search', bool, True, 'whether to use searx search engine to find new proxy lists', False)
|
||||
self.add_item('ppf', 'timeout', float, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False)
|
||||
self.add_item('ppf', 'checktime', int, 3600, 'base checking interval for urls in db in seconds', False)
|
||||
self.add_item('ppf', 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per experienced failure', False)
|
||||
|
||||
16
ppf.py
16
ppf.py
@@ -8,10 +8,12 @@ import mysqlite
|
||||
import proxywatchd
|
||||
from misc import _log
|
||||
from soup_parser import soupify
|
||||
import config
|
||||
from config import Config
|
||||
from http2 import RsHttp, _parse_url
|
||||
import rocksock
|
||||
|
||||
config = Config()
|
||||
|
||||
base_header = {
|
||||
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
}
|
||||
@@ -71,7 +73,7 @@ def insert_proxies(proxies, uri, sqlite, timestamp):
|
||||
_log('+%d item(s) from %s' % (len(new), uri), 'added')
|
||||
|
||||
def proxyfind(sqlite = None):
|
||||
if not sqlite: sqlite = mysqlite.mysqlite(config.database,str)
|
||||
if not sqlite: sqlite = mysqlite.mysqlite(config.common.database,str)
|
||||
choice = random.choice(searx_instances)
|
||||
urls = []
|
||||
|
||||
@@ -157,14 +159,14 @@ if __name__ == '__main__':
|
||||
config.load()
|
||||
proxies={'http':'socks4://%s' % random.choice(config.torhosts),'https':'socks4://%s' % random.choice(config.torhosts)}
|
||||
|
||||
sqlite = mysqlite.mysqlite(config.database, str)
|
||||
sqlite = mysqlite.mysqlite(config.common.database, str)
|
||||
## create dbs if required
|
||||
sqlite.execute('CREATE TABLE IF NOT EXISTS uris (added INT, url TEXT, check_time INT, error INT, driver INT, hash TEXT)')
|
||||
sqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, dronebl INT, proto TEXT, success_count INT, total_duration INT)')
|
||||
sqlite.commit()
|
||||
import_from_file('import.txt', sqlite)
|
||||
|
||||
if config._leechd.search:
|
||||
if config.ppf.search:
|
||||
## load search terms
|
||||
with open('search_terms.txt', 'r') as f:
|
||||
search_terms = [ i.strip() for i in f.read().split('\n') if len(i.strip()) ]
|
||||
@@ -175,7 +177,7 @@ if __name__ == '__main__':
|
||||
empty = [ urignore.append(i.split('/')[2]) for i in searx_instances ]
|
||||
|
||||
# start proxy watcher
|
||||
if config._watchd.threads > 0:
|
||||
if config.watchd.threads > 0:
|
||||
watcherd = proxywatchd.Proxywatchd()
|
||||
watcherd.start()
|
||||
else:
|
||||
@@ -185,11 +187,11 @@ if __name__ == '__main__':
|
||||
while True:
|
||||
try:
|
||||
## any site that needs to be checked ?
|
||||
rows = [ [i[0],i[1],i[2]] for i in sqlite.execute('SELECT url,hash,error FROM uris WHERE (check_time+?+(error*?) <?) ORDER BY RANDOM() LIMIT 25', (config._leechd.checktime, config._leechd.perfail_checktime, time.time())).fetchall() ]
|
||||
rows = [ [i[0],i[1],i[2]] for i in sqlite.execute('SELECT url,hash,error FROM uris WHERE (check_time+?+(error*?) <?) ORDER BY RANDOM() LIMIT 25', (config.ppf.checktime, config.ppf.perfail_checktime, time.time())).fetchall() ]
|
||||
|
||||
if len(rows): proxyleech(sqlite,rows)
|
||||
## search for new website during free time
|
||||
elif config._leechd.search: proxyfind(sqlite)
|
||||
elif config.ppf.search: proxyfind(sqlite)
|
||||
## sleep
|
||||
else: time.sleep(10)
|
||||
|
||||
|
||||
@@ -4,12 +4,14 @@ import threading
|
||||
import time, random, string, re, copy
|
||||
#from geoip import geolite2
|
||||
|
||||
import config
|
||||
from config import Config
|
||||
|
||||
import mysqlite
|
||||
from misc import _log
|
||||
import rocksock
|
||||
|
||||
config = Config()
|
||||
|
||||
_run_standalone = False
|
||||
|
||||
class WorkerJob():
|
||||
@@ -24,7 +26,7 @@ class WorkerJob():
|
||||
def connect_socket(self):
|
||||
srv = random.choice(config.servers).strip()
|
||||
protos = ['http', 'socks5', 'socks4'] if self.proto is None else [self.proto]
|
||||
server_port = 6697 if config._watchd.use_ssl else 6667
|
||||
server_port = 6697 if config.watchd.use_ssl else 6667
|
||||
|
||||
fail_inc = 1
|
||||
|
||||
@@ -37,12 +39,12 @@ class WorkerJob():
|
||||
]
|
||||
|
||||
try:
|
||||
sock = rocksock.Rocksock(host=srv, port=server_port, ssl=config._watchd.use_ssl, proxies=proxies, timeout=config._watchd.timeout)
|
||||
sock = rocksock.Rocksock(host=srv, port=server_port, ssl=config.watchd.use_ssl, proxies=proxies, timeout=config.watchd.timeout)
|
||||
sock.connect()
|
||||
sock.send('%s\n' % random.choice(['NICK', 'USER', 'JOIN', 'MODE', 'PART', 'INVITE', 'KNOCK', 'WHOIS', 'WHO', 'NOTICE', 'PRIVMSG', 'PING', 'QUIT']))
|
||||
return sock, proto, duration, torhost, srv, 0
|
||||
except rocksock.RocksockException as e:
|
||||
if config._watchd.debug:
|
||||
if config.watchd.debug:
|
||||
_log("proxy failed: %s://%s: %s"%(proto, self.proxy, e.get_errormessage()), 'debug')
|
||||
|
||||
et = e.get_errortype()
|
||||
@@ -183,7 +185,7 @@ class Proxywatchd():
|
||||
while not self.stopped.is_set(): time.sleep(0.1)
|
||||
|
||||
def _prep_db(self):
|
||||
self.mysqlite = mysqlite.mysqlite(config.database, str)
|
||||
self.mysqlite = mysqlite.mysqlite(config.common.database, str)
|
||||
def _close_db(self):
|
||||
if self.mysqlite:
|
||||
self.mysqlite.close()
|
||||
@@ -201,14 +203,14 @@ class Proxywatchd():
|
||||
self.mysqlite.commit()
|
||||
self._close_db()
|
||||
|
||||
self.submit_after = config._watchd.submit_after # number of collected jobs before writing db
|
||||
self.submit_after = config.watchd.submit_after # number of collected jobs before writing db
|
||||
self.jobs = []
|
||||
self.collected = []
|
||||
|
||||
def prepare_jobs(self):
|
||||
self._prep_db()
|
||||
q = 'SELECT proxy,proto,failed,success_count,total_duration FROM proxylist WHERE failed<? and tested<? ORDER BY RANDOM()' # ' LIMIT ?'
|
||||
rows = self.mysqlite.execute(q, (config._watchd.maxfail, time.time())).fetchall()
|
||||
rows = self.mysqlite.execute(q, (config.watchd.max_fail, time.time())).fetchall()
|
||||
for row in rows:
|
||||
job = WorkerJob(row[0], row[1], row[2], row[3], row[4])
|
||||
self.jobs.append(job)
|
||||
@@ -254,7 +256,7 @@ class Proxywatchd():
|
||||
return ret
|
||||
|
||||
def start(self):
|
||||
if config._watchd.threads == 1 and _run_standalone:
|
||||
if config.watchd.threads == 1 and _run_standalone:
|
||||
return self._run()
|
||||
else:
|
||||
return self._run_background()
|
||||
@@ -271,7 +273,7 @@ class Proxywatchd():
|
||||
def _run(self):
|
||||
_log('starting...', 'watchd')
|
||||
|
||||
for i in range(config._watchd.threads):
|
||||
for i in range(config.watchd.threads):
|
||||
threadid = ''.join( [ random.choice(string.letters) for x in range(5) ] )
|
||||
wt = WorkerThread(threadid)
|
||||
if self.in_background:
|
||||
|
||||
Reference in New Issue
Block a user