Merge branch 'changes' into 'master'

Changes

See merge request mserneels/ppf!1
This commit is contained in:
mserneels
2019-01-05 17:35:01 +00:00
4 changed files with 252 additions and 138 deletions

View File

@@ -4,17 +4,11 @@ database = proxylist.sqlite
proxy_max_fail = 5 proxy_max_fail = 5
[watcherd] [watcherd]
enabled = true
proxy_file = false proxy_file = false
checktime = 1800
threads = 10 threads = 10
timeout = 15 timeout = 15
read_timeout = 20
max_fail = 5
[proxyfind] [proxyfind]
enabled = true
search = true search = true
maxfail = 10
timeout = 30 timeout = 30
threads = 3 threads = 3

32
config.py Normal file
View File

@@ -0,0 +1,32 @@
from ConfigParser import SafeConfigParser
_loaded = False
def load():
if _loaded: return
global database, maxfail, search, torhosts, watchd_threads, checktime, timeout, read_timeout
## read the config files
parser = SafeConfigParser()
parser.read('config.ini')
database = parser.get('global', 'database')
maxfail = parser.getint('global', 'proxy_max_fail')
search = parser.getboolean('proxyfind', 'search')
torhosts = [ str(i).strip() for i in parser.get('global', 'tor_host').split(',') ]
watchd_threads = parser.getint('watcherd', 'threads')
timeout = parser.getint('watcherd', 'timeout')
# allow overriding select items from the commandline
import argparse
aparse = argparse.ArgumentParser()
aparse.add_argument('--watchd_threads', help="how many proxy checker threads to spin up, 0==none, default: 10", type=int, default=watchd_threads, required=False)
args = aparse.parse_args()
watchd_threads = args.watchd_threads
global servers
with open('servers.txt', 'r') as handle:
servers = handle.read().split('\n')

37
ppf.py
View File

@@ -6,13 +6,13 @@ import random, time
import re import re
import urllib import urllib
import hashlib import hashlib
from ConfigParser import SafeConfigParser
from requests.packages.urllib3.exceptions import InsecureRequestWarning from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning) requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
import mysqlite import mysqlite
import proxywatchd import proxywatchd
from misc import _log from misc import _log
from soup_parser import soupify from soup_parser import soupify
import config
base_header = { base_header = {
'Accept-Language':'en-US,en;q=0.8', 'Accept-Language':'en-US,en;q=0.8',
@@ -24,7 +24,6 @@ base_header = {
searx_instances = ('https://searx.me', 'https://searx.xyz', 'https://searx.site', 'https://searx.win', 'https://searx.ru', 'https://stemy.me/searx', 'https://searx.at', 'https://listi.me', 'https://searx.dk', 'https://searx.laquadrature.net' ) searx_instances = ('https://searx.me', 'https://searx.xyz', 'https://searx.site', 'https://searx.win', 'https://searx.ru', 'https://stemy.me/searx', 'https://searx.at', 'https://listi.me', 'https://searx.dk', 'https://searx.laquadrature.net' )
retry_messages = ('Engines cannot retrieve results', 'Rate limit exceeded') retry_messages = ('Engines cannot retrieve results', 'Rate limit exceeded')
CONFIG = 'config.ini'
def cleanhtml(raw_html): def cleanhtml(raw_html):
cleanr = re.compile('<.*?>') cleanr = re.compile('<.*?>')
@@ -45,6 +44,7 @@ def import_from_file(fn, sqlite):
def fetch_contents(uri): def fetch_contents(uri):
headers = base_header headers = base_header
try: resp = requests.get(uri, timeout=45, headers=headers, verify=False, proxies=proxies) try: resp = requests.get(uri, timeout=45, headers=headers, verify=False, proxies=proxies)
except KeyboardInterrupt as e: raise e
except: return '' except: return ''
data = resp.text data = resp.text
@@ -69,7 +69,7 @@ def insert_proxies(proxies, uri, sqlite):
def proxyfind(sqlite = None): def proxyfind(sqlite = None):
#print('entering proxyfind...') #print('entering proxyfind...')
if not sqlite: sqlite = mysqlite.mysqlite(database,str) if not sqlite: sqlite = mysqlite.mysqlite(config.database,str)
uris = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE error=0 and url not like "%github%" ORDER BY RANDOM() LIMIT 10').fetchall() ] uris = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE error=0 and url not like "%github%" ORDER BY RANDOM() LIMIT 10').fetchall() ]
@@ -113,6 +113,7 @@ def proxyleech(sqlite, rows):
for row in rows: for row in rows:
try: content = fetch_contents(row[0]) try: content = fetch_contents(row[0])
except KeyboardInterrupt as e: raise e
except: content = '' except: content = ''
uniques = [] uniques = []
@@ -120,6 +121,7 @@ def proxyleech(sqlite, rows):
if p in uniques: continue if p in uniques: continue
try: try:
if not is_reserved_ipv4(p.split(':')[0]): uniques.append(p) if not is_reserved_ipv4(p.split(':')[0]): uniques.append(p)
except KeyboardInterrupt as e: raise e
except: except:
pass pass
@@ -149,16 +151,10 @@ def proxyleech(sqlite, rows):
if __name__ == '__main__': if __name__ == '__main__':
## read the config files config.load()
parser = SafeConfigParser() proxies={'http':'socks4://%s' % random.choice(config.torhosts),'https':'socks4://%s' % random.choice(config.torhosts)}
parser.read(CONFIG)
database = parser.get('global', 'database') sqlite = mysqlite.mysqlite(config.database, str)
search = parser.getboolean('proxyfind', 'search')
tor_hosts = parser.get('global', 'tor_host').split(',')
proxies={'http':'socks4://%s' % random.choice(tor_hosts),'https':'socks4://%s' % random.choice(tor_hosts)}
sqlite = mysqlite.mysqlite(database, str)
## create dbs if required ## create dbs if required
sqlite.execute('CREATE TABLE IF NOT EXISTS uris (added INT, url TEXT, check_time INT, error INT, driver INT, hash TEXT)') sqlite.execute('CREATE TABLE IF NOT EXISTS uris (added INT, url TEXT, check_time INT, error INT, driver INT, hash TEXT)')
@@ -178,7 +174,11 @@ if __name__ == '__main__':
empty = [ urignore.append(i.split('/')[2]) for i in searx_instances ] empty = [ urignore.append(i.split('/')[2]) for i in searx_instances ]
# start proxy watcher # start proxy watcher
watcherd = proxywatchd.Proxywatchd(CONFIG) if parser.getboolean('watcherd', 'enabled') else None if config.watchd_threads > 0:
watcherd = proxywatchd.Proxywatchd()
watcherd.start()
else:
watcherd = None
while True: while True:
try: try:
@@ -186,13 +186,14 @@ if __name__ == '__main__':
rows = [ [i[0],i[1],i[2]] for i in sqlite.execute('SELECT url,hash,error FROM uris WHERE (check_time<? AND error<?) ORDER BY RANDOM() LIMIT 25', (time.time(), 10)).fetchall() ] rows = [ [i[0],i[1],i[2]] for i in sqlite.execute('SELECT url,hash,error FROM uris WHERE (check_time<? AND error<?) ORDER BY RANDOM() LIMIT 25', (time.time(), 10)).fetchall() ]
if len(rows): proxyleech(sqlite,rows) if len(rows): proxyleech(sqlite,rows)
## search for new website during free time ## search for new website during free time
elif search: proxyfind(sqlite) elif config.search: proxyfind(sqlite)
## sleep ## sleep
else: time.sleep(10) else: time.sleep(10)
except KeyboardInterrupt: break except KeyboardInterrupt:
if watcherd:
watcherd.stop()
watcherd.finish()
break
print '\r', print '\r',
# stop things
if watcherd: watcherd.stop()

View File

@@ -1,158 +1,245 @@
#!/usr/bin/env python #!/usr/bin/env python
from threading import Thread import threading
import threading, commands import time, random, string, re, copy
import socket, time, random, sys, string, re
import requests import requests
#from geoip import geolite2 #from geoip import geolite2
from ConfigParser import SafeConfigParser import config
import mysqlite import mysqlite
from misc import _log from misc import _log
import rocksock import rocksock
class Proxywatchd(Thread): _run_standalone = False
def stop(self): class WorkerJob():
_log('Requesting proxywatchd to halt (%d thread(s))' % len([item for item in self.threads if item.isAlive()])) def __init__(self, proxy, proto, failcount):
self.running = 0 self.proxy = proxy
self.proto = proto
def __init__(self, config_file): self.failcount = failcount
Thread.__init__(self) self.nextcheck = None
self.duration = None
self.threads = []
self.running = 1
self.parser = SafeConfigParser()
self.parser.read(config_file)
self.maxfail = self.parser.getint('global', 'proxy_max_fail')
self.maxthreads = self.parser.getint('watcherd', 'threads')
self.checktime = self.parser.getint('watcherd', 'checktime')
self.timeout = self.parser.getint('watcherd', 'timeout')
self.database = self.parser.get('global', 'database')
self.torhosts = [ str(i).strip() for i in self.parser.get('global', 'tor_host').split(',') ]
self.read_timeout = self.parser.getint('watcherd', 'read_timeout')
# create table if needed
self.mysqlite = mysqlite.mysqlite(self.database, str)
self.mysqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, source BLOB, dronebl INT, proto TEXT, duration INT)')
self.mysqlite.commit()
self.echoise = time.time() - 3600;
self.ticks = time.time() - 3600;
with open('servers.txt', 'r') as handle: self.servers = handle.read().split('\n')
self.start()
def run(self):
_log('Starting proxywatchd..', 'notice')
threads = []
self.mysqlite = mysqlite.mysqlite(self.database, str)
while self.running:
if len(threads) < self.maxthreads:
t = threading.Thread(target=self.daemon, args=(self.servers,))
t.start()
threads.append(t)
time.sleep( random.choice( xrange(1,3)))
else: time.sleep(1)
if (time.time() - self.echoise) >= 180:
_log('Proxywatchd threads: %d/%d' % (len(threads), self.maxthreads))
self.echoise = time.time()
self.mysqlite.close()
def is_drone_bl(self, proxy): def is_drone_bl(self, proxy):
p = proxy.split(':')[0] p = proxy.split(':')[0]
proxies = {'http':'socks4://%s:%s@%s' % (p,p,random.choice(self.torhosts))} proxies = {'http':'socks4://%s:%s@%s' % (p,p,random.choice(config.torhosts))}
resp = requests.get('http://dronebl.org/lookup?ip=%s' % p, proxies=proxies) resp = requests.get('http://dronebl.org/lookup?ip=%s' % p, proxies=proxies)
if 'No incidents regarding' in resp.text: return 0 if 'No incidents regarding' in resp.text: return 0
else: return 1 else: return 1
def connect_socket(self, proxy, servers, proto = None): def connect_socket(self):
protos = ['http', 'socks5', 'socks4'] if proto is None else proto protos = ['http', 'socks5', 'socks4'] if self.proto is None else self.proto
for proto in protos: for proto in protos:
torhost = random.choice(self.torhosts) torhost = random.choice(config.torhosts)
duration = time.time() duration = time.time()
proxies = [ rocksock.RocksockProxyFromURL('socks4://%s' % torhost), proxies = [
rocksock.RocksockProxyFromURL('%s://%s' % (proto, proxy[0])), rocksock.RocksockProxyFromURL('socks4://%s' % torhost),
] rocksock.RocksockProxyFromURL('%s://%s' % (proto, self.proxy)),
]
srv = random.choice(servers).strip() srv = random.choice(config.servers).strip()
try: try:
sock = rocksock.Rocksock(host=srv, port=6697, ssl=True, proxies=proxies, timeout=self.timeout) sock = rocksock.Rocksock(host=srv, port=6697, ssl=True, proxies=proxies, timeout=config.timeout)
sock.connect() sock.connect()
sock.send('%s\n' % random.choice(['NICK', 'USER', 'JOIN', 'MODE', 'PART', 'INVITE', 'KNOCK', 'WHOIS', 'WHO', 'NOTICE', 'PRIVMSG', 'PING', 'QUIT'])) sock.send('%s\n' % random.choice(['NICK', 'USER', 'JOIN', 'MODE', 'PART', 'INVITE', 'KNOCK', 'WHOIS', 'WHO', 'NOTICE', 'PRIVMSG', 'PING', 'QUIT']))
return sock, proto, duration, torhost, srv return sock, proto, duration, torhost, srv
except KeyboardInterrupt as e:
raise(e)
except: sock.disconnect() except: sock.disconnect()
return None, None, None, None, None return None, None, None, None, None
def daemon(self, servers): def run(self):
sqlite = mysqlite.mysqlite(self.database, str) self.nextcheck = (time.time() + 1800 + ((1+int(self.failcount)) * 3600))
threadid = ''.join( [ random.choice(string.letters) for x in range(5) ] )
q = 'SELECT proxy,failed,country,proto FROM proxylist WHERE failed<? and tested<? ORDER BY RANDOM() LIMIT ?' sock, proto, duration, tor, srv = self.connect_socket()
if not sock:
self.failcount += 1
return
try:
recv = sock.recv(6)
while self.running: # good data
sqlite_requests = [] if re.match('^(:|ERROR|PING|PONG|NOTICE|\*\*\*)', recv, re.IGNORECASE):
rows = sqlite.execute(q, (self.maxfail, time.time(), random.randint(10,20))).fetchall() duration = (time.time() - duration)
if not len(rows): self.nextcheck = (time.time() + 1800)
time.sleep(random.randint(10,20))
continue
abc = ' OR proxy='.join( [ '?' for x in xrange(0, len(rows)) ] ) #match = geolite2.lookup(proxy[0].split(':')[0])
args = [ (time.time() + 180) ] match = None
e = [ args.append(i[0]) for i in rows ] if match is not None: match = match.country
sqlite.executemany('UPDATE proxylist SET tested=? WHERE proxy=%s' % abc, (args,)) else: match = 'unknown'
sqlite.commit()
for proxy in rows: #dronebl = self.is_drone_bl(proxy[0])
time.sleep(0.1) self.proto = proto
nextcheck = (time.time() + 1800 + ((1+int(proxy[1])) * 3600)) self.duation = duration
self.failcount = 0
_log('%s://%s; c: %s; d: %d sec(s); tor: %s; srv: %s; recv: %s' % (proto, self.proxy, match, duration, tor, srv, recv), 'xxxxx')
except KeyboardInterrupt as e:
raise e
except:
self.failcount += 1
finally:
sock.disconnect()
sock, proto, duration, tor, srv = self.connect_socket(proxy, servers, proto=proxy[3])
if not sock:
sqlite_requests.append(((proxy[1]+1), nextcheck, 1, 'unknown', None, 0, proxy[0],))
continue
try: class WorkerThread():
recv = sock.recv(6) def __init__ (self, id):
self.id = id
self.done = threading.Event()
self.thread = None
self.workqueue = []
self.workdone = []
def stop(self):
self.done.set()
def term(self):
if self.thread: self.thread.join()
def add_jobs(self, jobs):
self.workqueue.extend(jobs)
def jobcount(self):
return len(self.workqueue)
def collect(self):
wd = copy.copy(self.workdone)
self.workdone = []
return wd
def start_thread(self):
self.thread = threading.Thread(target=self.workloop)
self.thread.start()
def workloop(self):
while True:
if len(self.workqueue):
job = self.workqueue.pop()
job.run()
self.workdone.append(job)
elif not self.thread:
break
if self.done.is_set(): break
time.sleep(0.01)
if self.thread:
_log("thread %s terminated", self.id)
# good data class Proxywatchd():
if re.match('^(:|ERROR|PING|PONG|NOTICE|\*\*\*)', recv, re.IGNORECASE):
duration = (time.time() - duration)
nextcheck = (time.time() + 1800)
#match = geolite2.lookup(proxy[0].split(':')[0]) def stop(self):
match = None _log('Requesting proxywatchd to halt (%d thread(s))' % len([item for item in self.threads if True]))
if match is not None: match = match.country self.stopping.set()
else: match = 'unknown'
#dronebl = self.is_drone_bl(proxy[0]) def _cleanup(self):
sqlite_requests.append( (0, nextcheck, 1, match, proto, duration, proxy[0],)) for wt in self.threads:
_log('%s://%s; c: %s; d: %d sec(s); tor: %s; srv: %s; recv: %s' % (proto, proxy[0], match, duration, tor, srv, recv), threadid) wt.stop()
for wt in self.threads:
wt.term()
self.collect_work()
self.submit_collected()
self.mysqlite.close()
self.stopped.set()
# bad data def finish(self):
else: if not self.in_background: self._cleanup()
sqlite_requests.append(( (proxy[1]+1), nextcheck, 1, 'unknown', None, 0, proxy[0],)) while not self.stopped.is_set(): time.sleep(0.1)
# also bad def __init__(self):
except: config.load()
sqlite_requests.append(( (proxy[1]+1), nextcheck, 1, 'unknown', None, 0, proxy[0],)) self.in_background = False
self.threads = []
self.stopping = threading.Event()
self.stopped = threading.Event()
finally: # create table if needed
sock.disconnect() self.mysqlite = mysqlite.mysqlite(config.database, str)
self.mysqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, source BLOB, dronebl INT, proto TEXT, duration INT)')
self.mysqlite.commit()
self.mysqlite.close()
self.mysqlite = None
for r in sqlite_requests: self.submit_after = 200 # number of collected jobs before writing db
sqlite.execute('UPDATE proxylist SET failed=?,tested=?,dronebl=?,country=?,proto=?,duration=? WHERE proxy=?', r) self.echoise = time.time() - 3600;
sqlite.commit() self.ticks = time.time() - 3600;
self.jobs = []
self.collected = []
sqlite.close() def prepare_jobs(self):
q = 'SELECT proxy,proto,failed FROM proxylist WHERE failed<? and tested<? ORDER BY RANDOM()' # ' LIMIT ?'
rows = self.mysqlite.execute(q, (config.maxfail, time.time())).fetchall()
for row in rows:
job = WorkerJob(row[0], row[1], row[2])
self.jobs.append(job)
def collect_work(self):
for wt in self.threads:
self.collected.extend(wt.collect())
def submit_collected(self):
query = 'UPDATE proxylist SET failed=?,tested=?,dronebl=?,country=?,proto=?,duration=? WHERE proxy=?'
for job in self.collected:
self.mysqlite.execute(query, (job.failcount, job.nextcheck, 1, "unknown", job.proto, job.duration, job.proxy))
self.mysqlite.commit()
self.collected = []
def start(self):
if config.watchd_threads == 1 and _run_standalone:
return self._run()
else:
return self._run_background()
def run(self):
if self.in_background:
while 1: time.sleep(0.1)
def _run_background(self):
self.in_background = True
t = threading.Thread(target=self._run)
t.start()
def _run(self):
_log('Starting proxywatchd..', 'notice')
self.mysqlite = mysqlite.mysqlite(config.database, str)
for i in range(config.watchd_threads):
threadid = ''.join( [ random.choice(string.letters) for x in range(5) ] )
wt = WorkerThread(threadid)
if self.in_background:
wt.start_thread()
self.threads.append(wt)
while True:
if self.stopping.is_set():
if self.in_background: self._cleanup()
break
if len(self.jobs) == 0:
self.prepare_jobs()
if len(self.jobs):
jpt = len(self.jobs)/config.watchd_threads
if len(self.jobs)/float(config.watchd_threads) - jpt > 0.0: jpt += 1
for tid in range(config.watchd_threads):
self.threads[tid].add_jobs(self.jobs[tid*jpt:tid*jpt+jpt])
self.jobs = []
if not self.in_background: # single_thread scenario
self.threads[0].workloop()
self.collect_work()
if len(self.collected) > self.submit_after:
self.submit_collected()
time.sleep(1)
if __name__ == '__main__':
_run_standalone = True
config.load()
w = Proxywatchd()
try:
w.start()
w.run()
except KeyboardInterrupt as e:
raise e
finally:
w.stop()
w.finish()