Merge branch 'changes' into 'master'

Changes

See merge request mserneels/ppf!1
This commit is contained in:
mserneels
2019-01-05 17:35:01 +00:00
4 changed files with 252 additions and 138 deletions

View File

@@ -4,17 +4,11 @@ database = proxylist.sqlite
proxy_max_fail = 5
[watcherd]
enabled = true
proxy_file = false
checktime = 1800
threads = 10
timeout = 15
read_timeout = 20
max_fail = 5
[proxyfind]
enabled = true
search = true
maxfail = 10
timeout = 30
threads = 3

32
config.py Normal file
View File

@@ -0,0 +1,32 @@
from ConfigParser import SafeConfigParser
_loaded = False
def load():
if _loaded: return
global database, maxfail, search, torhosts, watchd_threads, checktime, timeout, read_timeout
## read the config files
parser = SafeConfigParser()
parser.read('config.ini')
database = parser.get('global', 'database')
maxfail = parser.getint('global', 'proxy_max_fail')
search = parser.getboolean('proxyfind', 'search')
torhosts = [ str(i).strip() for i in parser.get('global', 'tor_host').split(',') ]
watchd_threads = parser.getint('watcherd', 'threads')
timeout = parser.getint('watcherd', 'timeout')
# allow overriding select items from the commandline
import argparse
aparse = argparse.ArgumentParser()
aparse.add_argument('--watchd_threads', help="how many proxy checker threads to spin up, 0==none, default: 10", type=int, default=watchd_threads, required=False)
args = aparse.parse_args()
watchd_threads = args.watchd_threads
global servers
with open('servers.txt', 'r') as handle:
servers = handle.read().split('\n')

37
ppf.py
View File

@@ -6,13 +6,13 @@ import random, time
import re
import urllib
import hashlib
from ConfigParser import SafeConfigParser
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
import mysqlite
import proxywatchd
from misc import _log
from soup_parser import soupify
import config
base_header = {
'Accept-Language':'en-US,en;q=0.8',
@@ -24,7 +24,6 @@ base_header = {
searx_instances = ('https://searx.me', 'https://searx.xyz', 'https://searx.site', 'https://searx.win', 'https://searx.ru', 'https://stemy.me/searx', 'https://searx.at', 'https://listi.me', 'https://searx.dk', 'https://searx.laquadrature.net' )
retry_messages = ('Engines cannot retrieve results', 'Rate limit exceeded')
CONFIG = 'config.ini'
def cleanhtml(raw_html):
cleanr = re.compile('<.*?>')
@@ -45,6 +44,7 @@ def import_from_file(fn, sqlite):
def fetch_contents(uri):
headers = base_header
try: resp = requests.get(uri, timeout=45, headers=headers, verify=False, proxies=proxies)
except KeyboardInterrupt as e: raise e
except: return ''
data = resp.text
@@ -69,7 +69,7 @@ def insert_proxies(proxies, uri, sqlite):
def proxyfind(sqlite = None):
#print('entering proxyfind...')
if not sqlite: sqlite = mysqlite.mysqlite(database,str)
if not sqlite: sqlite = mysqlite.mysqlite(config.database,str)
uris = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE error=0 and url not like "%github%" ORDER BY RANDOM() LIMIT 10').fetchall() ]
@@ -113,6 +113,7 @@ def proxyleech(sqlite, rows):
for row in rows:
try: content = fetch_contents(row[0])
except KeyboardInterrupt as e: raise e
except: content = ''
uniques = []
@@ -120,6 +121,7 @@ def proxyleech(sqlite, rows):
if p in uniques: continue
try:
if not is_reserved_ipv4(p.split(':')[0]): uniques.append(p)
except KeyboardInterrupt as e: raise e
except:
pass
@@ -149,16 +151,10 @@ def proxyleech(sqlite, rows):
if __name__ == '__main__':
## read the config files
parser = SafeConfigParser()
parser.read(CONFIG)
config.load()
proxies={'http':'socks4://%s' % random.choice(config.torhosts),'https':'socks4://%s' % random.choice(config.torhosts)}
database = parser.get('global', 'database')
search = parser.getboolean('proxyfind', 'search')
tor_hosts = parser.get('global', 'tor_host').split(',')
proxies={'http':'socks4://%s' % random.choice(tor_hosts),'https':'socks4://%s' % random.choice(tor_hosts)}
sqlite = mysqlite.mysqlite(database, str)
sqlite = mysqlite.mysqlite(config.database, str)
## create dbs if required
sqlite.execute('CREATE TABLE IF NOT EXISTS uris (added INT, url TEXT, check_time INT, error INT, driver INT, hash TEXT)')
@@ -178,7 +174,11 @@ if __name__ == '__main__':
empty = [ urignore.append(i.split('/')[2]) for i in searx_instances ]
# start proxy watcher
watcherd = proxywatchd.Proxywatchd(CONFIG) if parser.getboolean('watcherd', 'enabled') else None
if config.watchd_threads > 0:
watcherd = proxywatchd.Proxywatchd()
watcherd.start()
else:
watcherd = None
while True:
try:
@@ -186,13 +186,14 @@ if __name__ == '__main__':
rows = [ [i[0],i[1],i[2]] for i in sqlite.execute('SELECT url,hash,error FROM uris WHERE (check_time<? AND error<?) ORDER BY RANDOM() LIMIT 25', (time.time(), 10)).fetchall() ]
if len(rows): proxyleech(sqlite,rows)
## search for new website during free time
elif search: proxyfind(sqlite)
elif config.search: proxyfind(sqlite)
## sleep
else: time.sleep(10)
except KeyboardInterrupt: break
except KeyboardInterrupt:
if watcherd:
watcherd.stop()
watcherd.finish()
break
print '\r',
# stop things
if watcherd: watcherd.stop()

View File

@@ -1,158 +1,245 @@
#!/usr/bin/env python
from threading import Thread
import threading, commands
import socket, time, random, sys, string, re
import threading
import time, random, string, re, copy
import requests
#from geoip import geolite2
from ConfigParser import SafeConfigParser
import config
import mysqlite
from misc import _log
import rocksock
class Proxywatchd(Thread):
_run_standalone = False
def stop(self):
_log('Requesting proxywatchd to halt (%d thread(s))' % len([item for item in self.threads if item.isAlive()]))
self.running = 0
def __init__(self, config_file):
Thread.__init__(self)
self.threads = []
self.running = 1
self.parser = SafeConfigParser()
self.parser.read(config_file)
self.maxfail = self.parser.getint('global', 'proxy_max_fail')
self.maxthreads = self.parser.getint('watcherd', 'threads')
self.checktime = self.parser.getint('watcherd', 'checktime')
self.timeout = self.parser.getint('watcherd', 'timeout')
self.database = self.parser.get('global', 'database')
self.torhosts = [ str(i).strip() for i in self.parser.get('global', 'tor_host').split(',') ]
self.read_timeout = self.parser.getint('watcherd', 'read_timeout')
# create table if needed
self.mysqlite = mysqlite.mysqlite(self.database, str)
self.mysqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, source BLOB, dronebl INT, proto TEXT, duration INT)')
self.mysqlite.commit()
self.echoise = time.time() - 3600;
self.ticks = time.time() - 3600;
with open('servers.txt', 'r') as handle: self.servers = handle.read().split('\n')
self.start()
def run(self):
_log('Starting proxywatchd..', 'notice')
threads = []
self.mysqlite = mysqlite.mysqlite(self.database, str)
while self.running:
if len(threads) < self.maxthreads:
t = threading.Thread(target=self.daemon, args=(self.servers,))
t.start()
threads.append(t)
time.sleep( random.choice( xrange(1,3)))
else: time.sleep(1)
if (time.time() - self.echoise) >= 180:
_log('Proxywatchd threads: %d/%d' % (len(threads), self.maxthreads))
self.echoise = time.time()
self.mysqlite.close()
class WorkerJob():
def __init__(self, proxy, proto, failcount):
self.proxy = proxy
self.proto = proto
self.failcount = failcount
self.nextcheck = None
self.duration = None
def is_drone_bl(self, proxy):
p = proxy.split(':')[0]
proxies = {'http':'socks4://%s:%s@%s' % (p,p,random.choice(self.torhosts))}
proxies = {'http':'socks4://%s:%s@%s' % (p,p,random.choice(config.torhosts))}
resp = requests.get('http://dronebl.org/lookup?ip=%s' % p, proxies=proxies)
if 'No incidents regarding' in resp.text: return 0
else: return 1
def connect_socket(self, proxy, servers, proto = None):
protos = ['http', 'socks5', 'socks4'] if proto is None else proto
def connect_socket(self):
protos = ['http', 'socks5', 'socks4'] if self.proto is None else self.proto
for proto in protos:
torhost = random.choice(self.torhosts)
torhost = random.choice(config.torhosts)
duration = time.time()
proxies = [ rocksock.RocksockProxyFromURL('socks4://%s' % torhost),
rocksock.RocksockProxyFromURL('%s://%s' % (proto, proxy[0])),
]
proxies = [
rocksock.RocksockProxyFromURL('socks4://%s' % torhost),
rocksock.RocksockProxyFromURL('%s://%s' % (proto, self.proxy)),
]
srv = random.choice(servers).strip()
srv = random.choice(config.servers).strip()
try:
sock = rocksock.Rocksock(host=srv, port=6697, ssl=True, proxies=proxies, timeout=self.timeout)
sock = rocksock.Rocksock(host=srv, port=6697, ssl=True, proxies=proxies, timeout=config.timeout)
sock.connect()
sock.send('%s\n' % random.choice(['NICK', 'USER', 'JOIN', 'MODE', 'PART', 'INVITE', 'KNOCK', 'WHOIS', 'WHO', 'NOTICE', 'PRIVMSG', 'PING', 'QUIT']))
return sock, proto, duration, torhost, srv
except KeyboardInterrupt as e:
raise(e)
except: sock.disconnect()
return None, None, None, None, None
def daemon(self, servers):
sqlite = mysqlite.mysqlite(self.database, str)
threadid = ''.join( [ random.choice(string.letters) for x in range(5) ] )
def run(self):
self.nextcheck = (time.time() + 1800 + ((1+int(self.failcount)) * 3600))
q = 'SELECT proxy,failed,country,proto FROM proxylist WHERE failed<? and tested<? ORDER BY RANDOM() LIMIT ?'
sock, proto, duration, tor, srv = self.connect_socket()
if not sock:
self.failcount += 1
return
try:
recv = sock.recv(6)
while self.running:
sqlite_requests = []
rows = sqlite.execute(q, (self.maxfail, time.time(), random.randint(10,20))).fetchall()
if not len(rows):
time.sleep(random.randint(10,20))
continue
# good data
if re.match('^(:|ERROR|PING|PONG|NOTICE|\*\*\*)', recv, re.IGNORECASE):
duration = (time.time() - duration)
self.nextcheck = (time.time() + 1800)
abc = ' OR proxy='.join( [ '?' for x in xrange(0, len(rows)) ] )
args = [ (time.time() + 180) ]
e = [ args.append(i[0]) for i in rows ]
sqlite.executemany('UPDATE proxylist SET tested=? WHERE proxy=%s' % abc, (args,))
sqlite.commit()
#match = geolite2.lookup(proxy[0].split(':')[0])
match = None
if match is not None: match = match.country
else: match = 'unknown'
for proxy in rows:
time.sleep(0.1)
nextcheck = (time.time() + 1800 + ((1+int(proxy[1])) * 3600))
#dronebl = self.is_drone_bl(proxy[0])
self.proto = proto
self.duation = duration
self.failcount = 0
_log('%s://%s; c: %s; d: %d sec(s); tor: %s; srv: %s; recv: %s' % (proto, self.proxy, match, duration, tor, srv, recv), 'xxxxx')
except KeyboardInterrupt as e:
raise e
except:
self.failcount += 1
finally:
sock.disconnect()
sock, proto, duration, tor, srv = self.connect_socket(proxy, servers, proto=proxy[3])
if not sock:
sqlite_requests.append(((proxy[1]+1), nextcheck, 1, 'unknown', None, 0, proxy[0],))
continue
try:
recv = sock.recv(6)
class WorkerThread():
def __init__ (self, id):
self.id = id
self.done = threading.Event()
self.thread = None
self.workqueue = []
self.workdone = []
def stop(self):
self.done.set()
def term(self):
if self.thread: self.thread.join()
def add_jobs(self, jobs):
self.workqueue.extend(jobs)
def jobcount(self):
return len(self.workqueue)
def collect(self):
wd = copy.copy(self.workdone)
self.workdone = []
return wd
def start_thread(self):
self.thread = threading.Thread(target=self.workloop)
self.thread.start()
def workloop(self):
while True:
if len(self.workqueue):
job = self.workqueue.pop()
job.run()
self.workdone.append(job)
elif not self.thread:
break
if self.done.is_set(): break
time.sleep(0.01)
if self.thread:
_log("thread %s terminated", self.id)
# good data
if re.match('^(:|ERROR|PING|PONG|NOTICE|\*\*\*)', recv, re.IGNORECASE):
duration = (time.time() - duration)
nextcheck = (time.time() + 1800)
class Proxywatchd():
#match = geolite2.lookup(proxy[0].split(':')[0])
match = None
if match is not None: match = match.country
else: match = 'unknown'
def stop(self):
_log('Requesting proxywatchd to halt (%d thread(s))' % len([item for item in self.threads if True]))
self.stopping.set()
#dronebl = self.is_drone_bl(proxy[0])
sqlite_requests.append( (0, nextcheck, 1, match, proto, duration, proxy[0],))
_log('%s://%s; c: %s; d: %d sec(s); tor: %s; srv: %s; recv: %s' % (proto, proxy[0], match, duration, tor, srv, recv), threadid)
def _cleanup(self):
for wt in self.threads:
wt.stop()
for wt in self.threads:
wt.term()
self.collect_work()
self.submit_collected()
self.mysqlite.close()
self.stopped.set()
# bad data
else:
sqlite_requests.append(( (proxy[1]+1), nextcheck, 1, 'unknown', None, 0, proxy[0],))
def finish(self):
if not self.in_background: self._cleanup()
while not self.stopped.is_set(): time.sleep(0.1)
# also bad
except:
sqlite_requests.append(( (proxy[1]+1), nextcheck, 1, 'unknown', None, 0, proxy[0],))
def __init__(self):
config.load()
self.in_background = False
self.threads = []
self.stopping = threading.Event()
self.stopped = threading.Event()
finally:
sock.disconnect()
# create table if needed
self.mysqlite = mysqlite.mysqlite(config.database, str)
self.mysqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, source BLOB, dronebl INT, proto TEXT, duration INT)')
self.mysqlite.commit()
self.mysqlite.close()
self.mysqlite = None
for r in sqlite_requests:
sqlite.execute('UPDATE proxylist SET failed=?,tested=?,dronebl=?,country=?,proto=?,duration=? WHERE proxy=?', r)
sqlite.commit()
self.submit_after = 200 # number of collected jobs before writing db
self.echoise = time.time() - 3600;
self.ticks = time.time() - 3600;
self.jobs = []
self.collected = []
sqlite.close()
def prepare_jobs(self):
q = 'SELECT proxy,proto,failed FROM proxylist WHERE failed<? and tested<? ORDER BY RANDOM()' # ' LIMIT ?'
rows = self.mysqlite.execute(q, (config.maxfail, time.time())).fetchall()
for row in rows:
job = WorkerJob(row[0], row[1], row[2])
self.jobs.append(job)
def collect_work(self):
for wt in self.threads:
self.collected.extend(wt.collect())
def submit_collected(self):
query = 'UPDATE proxylist SET failed=?,tested=?,dronebl=?,country=?,proto=?,duration=? WHERE proxy=?'
for job in self.collected:
self.mysqlite.execute(query, (job.failcount, job.nextcheck, 1, "unknown", job.proto, job.duration, job.proxy))
self.mysqlite.commit()
self.collected = []
def start(self):
if config.watchd_threads == 1 and _run_standalone:
return self._run()
else:
return self._run_background()
def run(self):
if self.in_background:
while 1: time.sleep(0.1)
def _run_background(self):
self.in_background = True
t = threading.Thread(target=self._run)
t.start()
def _run(self):
_log('Starting proxywatchd..', 'notice')
self.mysqlite = mysqlite.mysqlite(config.database, str)
for i in range(config.watchd_threads):
threadid = ''.join( [ random.choice(string.letters) for x in range(5) ] )
wt = WorkerThread(threadid)
if self.in_background:
wt.start_thread()
self.threads.append(wt)
while True:
if self.stopping.is_set():
if self.in_background: self._cleanup()
break
if len(self.jobs) == 0:
self.prepare_jobs()
if len(self.jobs):
jpt = len(self.jobs)/config.watchd_threads
if len(self.jobs)/float(config.watchd_threads) - jpt > 0.0: jpt += 1
for tid in range(config.watchd_threads):
self.threads[tid].add_jobs(self.jobs[tid*jpt:tid*jpt+jpt])
self.jobs = []
if not self.in_background: # single_thread scenario
self.threads[0].workloop()
self.collect_work()
if len(self.collected) > self.submit_after:
self.submit_collected()
time.sleep(1)
if __name__ == '__main__':
_run_standalone = True
config.load()
w = Proxywatchd()
try:
w.start()
w.run()
except KeyboardInterrupt as e:
raise e
finally:
w.stop()
w.finish()