Merge branch 'changes' into 'master'
Changes See merge request mserneels/ppf!1
This commit is contained in:
@@ -4,17 +4,11 @@ database = proxylist.sqlite
|
||||
proxy_max_fail = 5
|
||||
|
||||
[watcherd]
|
||||
enabled = true
|
||||
proxy_file = false
|
||||
checktime = 1800
|
||||
threads = 10
|
||||
timeout = 15
|
||||
read_timeout = 20
|
||||
max_fail = 5
|
||||
|
||||
[proxyfind]
|
||||
enabled = true
|
||||
search = true
|
||||
maxfail = 10
|
||||
timeout = 30
|
||||
threads = 3
|
||||
|
||||
32
config.py
Normal file
32
config.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from ConfigParser import SafeConfigParser
|
||||
|
||||
_loaded = False
|
||||
|
||||
def load():
|
||||
if _loaded: return
|
||||
global database, maxfail, search, torhosts, watchd_threads, checktime, timeout, read_timeout
|
||||
|
||||
## read the config files
|
||||
parser = SafeConfigParser()
|
||||
parser.read('config.ini')
|
||||
|
||||
database = parser.get('global', 'database')
|
||||
maxfail = parser.getint('global', 'proxy_max_fail')
|
||||
|
||||
search = parser.getboolean('proxyfind', 'search')
|
||||
|
||||
torhosts = [ str(i).strip() for i in parser.get('global', 'tor_host').split(',') ]
|
||||
watchd_threads = parser.getint('watcherd', 'threads')
|
||||
timeout = parser.getint('watcherd', 'timeout')
|
||||
|
||||
# allow overriding select items from the commandline
|
||||
import argparse
|
||||
aparse = argparse.ArgumentParser()
|
||||
aparse.add_argument('--watchd_threads', help="how many proxy checker threads to spin up, 0==none, default: 10", type=int, default=watchd_threads, required=False)
|
||||
args = aparse.parse_args()
|
||||
|
||||
watchd_threads = args.watchd_threads
|
||||
|
||||
global servers
|
||||
with open('servers.txt', 'r') as handle:
|
||||
servers = handle.read().split('\n')
|
||||
37
ppf.py
37
ppf.py
@@ -6,13 +6,13 @@ import random, time
|
||||
import re
|
||||
import urllib
|
||||
import hashlib
|
||||
from ConfigParser import SafeConfigParser
|
||||
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
||||
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
||||
import mysqlite
|
||||
import proxywatchd
|
||||
from misc import _log
|
||||
from soup_parser import soupify
|
||||
import config
|
||||
|
||||
base_header = {
|
||||
'Accept-Language':'en-US,en;q=0.8',
|
||||
@@ -24,7 +24,6 @@ base_header = {
|
||||
|
||||
searx_instances = ('https://searx.me', 'https://searx.xyz', 'https://searx.site', 'https://searx.win', 'https://searx.ru', 'https://stemy.me/searx', 'https://searx.at', 'https://listi.me', 'https://searx.dk', 'https://searx.laquadrature.net' )
|
||||
retry_messages = ('Engines cannot retrieve results', 'Rate limit exceeded')
|
||||
CONFIG = 'config.ini'
|
||||
|
||||
def cleanhtml(raw_html):
|
||||
cleanr = re.compile('<.*?>')
|
||||
@@ -45,6 +44,7 @@ def import_from_file(fn, sqlite):
|
||||
def fetch_contents(uri):
|
||||
headers = base_header
|
||||
try: resp = requests.get(uri, timeout=45, headers=headers, verify=False, proxies=proxies)
|
||||
except KeyboardInterrupt as e: raise e
|
||||
except: return ''
|
||||
data = resp.text
|
||||
|
||||
@@ -69,7 +69,7 @@ def insert_proxies(proxies, uri, sqlite):
|
||||
def proxyfind(sqlite = None):
|
||||
#print('entering proxyfind...')
|
||||
|
||||
if not sqlite: sqlite = mysqlite.mysqlite(database,str)
|
||||
if not sqlite: sqlite = mysqlite.mysqlite(config.database,str)
|
||||
|
||||
uris = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE error=0 and url not like "%github%" ORDER BY RANDOM() LIMIT 10').fetchall() ]
|
||||
|
||||
@@ -113,6 +113,7 @@ def proxyleech(sqlite, rows):
|
||||
|
||||
for row in rows:
|
||||
try: content = fetch_contents(row[0])
|
||||
except KeyboardInterrupt as e: raise e
|
||||
except: content = ''
|
||||
|
||||
uniques = []
|
||||
@@ -120,6 +121,7 @@ def proxyleech(sqlite, rows):
|
||||
if p in uniques: continue
|
||||
try:
|
||||
if not is_reserved_ipv4(p.split(':')[0]): uniques.append(p)
|
||||
except KeyboardInterrupt as e: raise e
|
||||
except:
|
||||
pass
|
||||
|
||||
@@ -149,16 +151,10 @@ def proxyleech(sqlite, rows):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
## read the config files
|
||||
parser = SafeConfigParser()
|
||||
parser.read(CONFIG)
|
||||
config.load()
|
||||
proxies={'http':'socks4://%s' % random.choice(config.torhosts),'https':'socks4://%s' % random.choice(config.torhosts)}
|
||||
|
||||
database = parser.get('global', 'database')
|
||||
search = parser.getboolean('proxyfind', 'search')
|
||||
tor_hosts = parser.get('global', 'tor_host').split(',')
|
||||
proxies={'http':'socks4://%s' % random.choice(tor_hosts),'https':'socks4://%s' % random.choice(tor_hosts)}
|
||||
|
||||
sqlite = mysqlite.mysqlite(database, str)
|
||||
sqlite = mysqlite.mysqlite(config.database, str)
|
||||
|
||||
## create dbs if required
|
||||
sqlite.execute('CREATE TABLE IF NOT EXISTS uris (added INT, url TEXT, check_time INT, error INT, driver INT, hash TEXT)')
|
||||
@@ -178,7 +174,11 @@ if __name__ == '__main__':
|
||||
empty = [ urignore.append(i.split('/')[2]) for i in searx_instances ]
|
||||
|
||||
# start proxy watcher
|
||||
watcherd = proxywatchd.Proxywatchd(CONFIG) if parser.getboolean('watcherd', 'enabled') else None
|
||||
if config.watchd_threads > 0:
|
||||
watcherd = proxywatchd.Proxywatchd()
|
||||
watcherd.start()
|
||||
else:
|
||||
watcherd = None
|
||||
|
||||
while True:
|
||||
try:
|
||||
@@ -186,13 +186,14 @@ if __name__ == '__main__':
|
||||
rows = [ [i[0],i[1],i[2]] for i in sqlite.execute('SELECT url,hash,error FROM uris WHERE (check_time<? AND error<?) ORDER BY RANDOM() LIMIT 25', (time.time(), 10)).fetchall() ]
|
||||
if len(rows): proxyleech(sqlite,rows)
|
||||
## search for new website during free time
|
||||
elif search: proxyfind(sqlite)
|
||||
elif config.search: proxyfind(sqlite)
|
||||
## sleep
|
||||
else: time.sleep(10)
|
||||
|
||||
except KeyboardInterrupt: break
|
||||
except KeyboardInterrupt:
|
||||
if watcherd:
|
||||
watcherd.stop()
|
||||
watcherd.finish()
|
||||
break
|
||||
|
||||
print '\r',
|
||||
|
||||
# stop things
|
||||
if watcherd: watcherd.stop()
|
||||
|
||||
315
proxywatchd.py
315
proxywatchd.py
@@ -1,158 +1,245 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from threading import Thread
|
||||
import threading, commands
|
||||
import socket, time, random, sys, string, re
|
||||
import threading
|
||||
import time, random, string, re, copy
|
||||
import requests
|
||||
#from geoip import geolite2
|
||||
|
||||
from ConfigParser import SafeConfigParser
|
||||
import config
|
||||
|
||||
import mysqlite
|
||||
from misc import _log
|
||||
import rocksock
|
||||
|
||||
class Proxywatchd(Thread):
|
||||
_run_standalone = False
|
||||
|
||||
def stop(self):
|
||||
_log('Requesting proxywatchd to halt (%d thread(s))' % len([item for item in self.threads if item.isAlive()]))
|
||||
self.running = 0
|
||||
|
||||
def __init__(self, config_file):
|
||||
Thread.__init__(self)
|
||||
|
||||
self.threads = []
|
||||
self.running = 1
|
||||
self.parser = SafeConfigParser()
|
||||
self.parser.read(config_file)
|
||||
|
||||
self.maxfail = self.parser.getint('global', 'proxy_max_fail')
|
||||
self.maxthreads = self.parser.getint('watcherd', 'threads')
|
||||
self.checktime = self.parser.getint('watcherd', 'checktime')
|
||||
self.timeout = self.parser.getint('watcherd', 'timeout')
|
||||
self.database = self.parser.get('global', 'database')
|
||||
self.torhosts = [ str(i).strip() for i in self.parser.get('global', 'tor_host').split(',') ]
|
||||
self.read_timeout = self.parser.getint('watcherd', 'read_timeout')
|
||||
|
||||
# create table if needed
|
||||
self.mysqlite = mysqlite.mysqlite(self.database, str)
|
||||
self.mysqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, source BLOB, dronebl INT, proto TEXT, duration INT)')
|
||||
self.mysqlite.commit()
|
||||
self.echoise = time.time() - 3600;
|
||||
self.ticks = time.time() - 3600;
|
||||
|
||||
with open('servers.txt', 'r') as handle: self.servers = handle.read().split('\n')
|
||||
|
||||
self.start()
|
||||
|
||||
def run(self):
|
||||
_log('Starting proxywatchd..', 'notice')
|
||||
|
||||
threads = []
|
||||
self.mysqlite = mysqlite.mysqlite(self.database, str)
|
||||
|
||||
while self.running:
|
||||
|
||||
if len(threads) < self.maxthreads:
|
||||
t = threading.Thread(target=self.daemon, args=(self.servers,))
|
||||
t.start()
|
||||
threads.append(t)
|
||||
time.sleep( random.choice( xrange(1,3)))
|
||||
|
||||
else: time.sleep(1)
|
||||
|
||||
if (time.time() - self.echoise) >= 180:
|
||||
_log('Proxywatchd threads: %d/%d' % (len(threads), self.maxthreads))
|
||||
self.echoise = time.time()
|
||||
|
||||
self.mysqlite.close()
|
||||
class WorkerJob():
|
||||
def __init__(self, proxy, proto, failcount):
|
||||
self.proxy = proxy
|
||||
self.proto = proto
|
||||
self.failcount = failcount
|
||||
self.nextcheck = None
|
||||
self.duration = None
|
||||
|
||||
def is_drone_bl(self, proxy):
|
||||
p = proxy.split(':')[0]
|
||||
proxies = {'http':'socks4://%s:%s@%s' % (p,p,random.choice(self.torhosts))}
|
||||
proxies = {'http':'socks4://%s:%s@%s' % (p,p,random.choice(config.torhosts))}
|
||||
resp = requests.get('http://dronebl.org/lookup?ip=%s' % p, proxies=proxies)
|
||||
if 'No incidents regarding' in resp.text: return 0
|
||||
else: return 1
|
||||
|
||||
def connect_socket(self, proxy, servers, proto = None):
|
||||
protos = ['http', 'socks5', 'socks4'] if proto is None else proto
|
||||
def connect_socket(self):
|
||||
protos = ['http', 'socks5', 'socks4'] if self.proto is None else self.proto
|
||||
|
||||
for proto in protos:
|
||||
torhost = random.choice(self.torhosts)
|
||||
torhost = random.choice(config.torhosts)
|
||||
duration = time.time()
|
||||
proxies = [ rocksock.RocksockProxyFromURL('socks4://%s' % torhost),
|
||||
rocksock.RocksockProxyFromURL('%s://%s' % (proto, proxy[0])),
|
||||
]
|
||||
proxies = [
|
||||
rocksock.RocksockProxyFromURL('socks4://%s' % torhost),
|
||||
rocksock.RocksockProxyFromURL('%s://%s' % (proto, self.proxy)),
|
||||
]
|
||||
|
||||
srv = random.choice(servers).strip()
|
||||
srv = random.choice(config.servers).strip()
|
||||
try:
|
||||
sock = rocksock.Rocksock(host=srv, port=6697, ssl=True, proxies=proxies, timeout=self.timeout)
|
||||
sock = rocksock.Rocksock(host=srv, port=6697, ssl=True, proxies=proxies, timeout=config.timeout)
|
||||
sock.connect()
|
||||
sock.send('%s\n' % random.choice(['NICK', 'USER', 'JOIN', 'MODE', 'PART', 'INVITE', 'KNOCK', 'WHOIS', 'WHO', 'NOTICE', 'PRIVMSG', 'PING', 'QUIT']))
|
||||
return sock, proto, duration, torhost, srv
|
||||
|
||||
except KeyboardInterrupt as e:
|
||||
raise(e)
|
||||
except: sock.disconnect()
|
||||
|
||||
return None, None, None, None, None
|
||||
|
||||
def daemon(self, servers):
|
||||
sqlite = mysqlite.mysqlite(self.database, str)
|
||||
threadid = ''.join( [ random.choice(string.letters) for x in range(5) ] )
|
||||
def run(self):
|
||||
self.nextcheck = (time.time() + 1800 + ((1+int(self.failcount)) * 3600))
|
||||
|
||||
q = 'SELECT proxy,failed,country,proto FROM proxylist WHERE failed<? and tested<? ORDER BY RANDOM() LIMIT ?'
|
||||
sock, proto, duration, tor, srv = self.connect_socket()
|
||||
if not sock:
|
||||
self.failcount += 1
|
||||
return
|
||||
try:
|
||||
recv = sock.recv(6)
|
||||
|
||||
while self.running:
|
||||
sqlite_requests = []
|
||||
rows = sqlite.execute(q, (self.maxfail, time.time(), random.randint(10,20))).fetchall()
|
||||
if not len(rows):
|
||||
time.sleep(random.randint(10,20))
|
||||
continue
|
||||
# good data
|
||||
if re.match('^(:|ERROR|PING|PONG|NOTICE|\*\*\*)', recv, re.IGNORECASE):
|
||||
duration = (time.time() - duration)
|
||||
self.nextcheck = (time.time() + 1800)
|
||||
|
||||
abc = ' OR proxy='.join( [ '?' for x in xrange(0, len(rows)) ] )
|
||||
args = [ (time.time() + 180) ]
|
||||
e = [ args.append(i[0]) for i in rows ]
|
||||
sqlite.executemany('UPDATE proxylist SET tested=? WHERE proxy=%s' % abc, (args,))
|
||||
sqlite.commit()
|
||||
#match = geolite2.lookup(proxy[0].split(':')[0])
|
||||
match = None
|
||||
if match is not None: match = match.country
|
||||
else: match = 'unknown'
|
||||
|
||||
for proxy in rows:
|
||||
time.sleep(0.1)
|
||||
nextcheck = (time.time() + 1800 + ((1+int(proxy[1])) * 3600))
|
||||
#dronebl = self.is_drone_bl(proxy[0])
|
||||
self.proto = proto
|
||||
self.duation = duration
|
||||
self.failcount = 0
|
||||
_log('%s://%s; c: %s; d: %d sec(s); tor: %s; srv: %s; recv: %s' % (proto, self.proxy, match, duration, tor, srv, recv), 'xxxxx')
|
||||
except KeyboardInterrupt as e:
|
||||
raise e
|
||||
except:
|
||||
self.failcount += 1
|
||||
finally:
|
||||
sock.disconnect()
|
||||
|
||||
sock, proto, duration, tor, srv = self.connect_socket(proxy, servers, proto=proxy[3])
|
||||
if not sock:
|
||||
sqlite_requests.append(((proxy[1]+1), nextcheck, 1, 'unknown', None, 0, proxy[0],))
|
||||
continue
|
||||
|
||||
try:
|
||||
recv = sock.recv(6)
|
||||
class WorkerThread():
|
||||
def __init__ (self, id):
|
||||
self.id = id
|
||||
self.done = threading.Event()
|
||||
self.thread = None
|
||||
self.workqueue = []
|
||||
self.workdone = []
|
||||
def stop(self):
|
||||
self.done.set()
|
||||
def term(self):
|
||||
if self.thread: self.thread.join()
|
||||
def add_jobs(self, jobs):
|
||||
self.workqueue.extend(jobs)
|
||||
def jobcount(self):
|
||||
return len(self.workqueue)
|
||||
def collect(self):
|
||||
wd = copy.copy(self.workdone)
|
||||
self.workdone = []
|
||||
return wd
|
||||
def start_thread(self):
|
||||
self.thread = threading.Thread(target=self.workloop)
|
||||
self.thread.start()
|
||||
def workloop(self):
|
||||
while True:
|
||||
if len(self.workqueue):
|
||||
job = self.workqueue.pop()
|
||||
job.run()
|
||||
self.workdone.append(job)
|
||||
elif not self.thread:
|
||||
break
|
||||
if self.done.is_set(): break
|
||||
time.sleep(0.01)
|
||||
if self.thread:
|
||||
_log("thread %s terminated", self.id)
|
||||
|
||||
# good data
|
||||
if re.match('^(:|ERROR|PING|PONG|NOTICE|\*\*\*)', recv, re.IGNORECASE):
|
||||
duration = (time.time() - duration)
|
||||
nextcheck = (time.time() + 1800)
|
||||
class Proxywatchd():
|
||||
|
||||
#match = geolite2.lookup(proxy[0].split(':')[0])
|
||||
match = None
|
||||
if match is not None: match = match.country
|
||||
else: match = 'unknown'
|
||||
def stop(self):
|
||||
_log('Requesting proxywatchd to halt (%d thread(s))' % len([item for item in self.threads if True]))
|
||||
self.stopping.set()
|
||||
|
||||
#dronebl = self.is_drone_bl(proxy[0])
|
||||
sqlite_requests.append( (0, nextcheck, 1, match, proto, duration, proxy[0],))
|
||||
_log('%s://%s; c: %s; d: %d sec(s); tor: %s; srv: %s; recv: %s' % (proto, proxy[0], match, duration, tor, srv, recv), threadid)
|
||||
def _cleanup(self):
|
||||
for wt in self.threads:
|
||||
wt.stop()
|
||||
for wt in self.threads:
|
||||
wt.term()
|
||||
self.collect_work()
|
||||
self.submit_collected()
|
||||
self.mysqlite.close()
|
||||
self.stopped.set()
|
||||
|
||||
# bad data
|
||||
else:
|
||||
sqlite_requests.append(( (proxy[1]+1), nextcheck, 1, 'unknown', None, 0, proxy[0],))
|
||||
def finish(self):
|
||||
if not self.in_background: self._cleanup()
|
||||
while not self.stopped.is_set(): time.sleep(0.1)
|
||||
|
||||
# also bad
|
||||
except:
|
||||
sqlite_requests.append(( (proxy[1]+1), nextcheck, 1, 'unknown', None, 0, proxy[0],))
|
||||
def __init__(self):
|
||||
config.load()
|
||||
self.in_background = False
|
||||
self.threads = []
|
||||
self.stopping = threading.Event()
|
||||
self.stopped = threading.Event()
|
||||
|
||||
finally:
|
||||
sock.disconnect()
|
||||
# create table if needed
|
||||
self.mysqlite = mysqlite.mysqlite(config.database, str)
|
||||
self.mysqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, source BLOB, dronebl INT, proto TEXT, duration INT)')
|
||||
self.mysqlite.commit()
|
||||
self.mysqlite.close()
|
||||
self.mysqlite = None
|
||||
|
||||
for r in sqlite_requests:
|
||||
sqlite.execute('UPDATE proxylist SET failed=?,tested=?,dronebl=?,country=?,proto=?,duration=? WHERE proxy=?', r)
|
||||
sqlite.commit()
|
||||
self.submit_after = 200 # number of collected jobs before writing db
|
||||
self.echoise = time.time() - 3600;
|
||||
self.ticks = time.time() - 3600;
|
||||
self.jobs = []
|
||||
self.collected = []
|
||||
|
||||
sqlite.close()
|
||||
def prepare_jobs(self):
|
||||
q = 'SELECT proxy,proto,failed FROM proxylist WHERE failed<? and tested<? ORDER BY RANDOM()' # ' LIMIT ?'
|
||||
rows = self.mysqlite.execute(q, (config.maxfail, time.time())).fetchall()
|
||||
for row in rows:
|
||||
job = WorkerJob(row[0], row[1], row[2])
|
||||
self.jobs.append(job)
|
||||
|
||||
def collect_work(self):
|
||||
for wt in self.threads:
|
||||
self.collected.extend(wt.collect())
|
||||
|
||||
def submit_collected(self):
|
||||
query = 'UPDATE proxylist SET failed=?,tested=?,dronebl=?,country=?,proto=?,duration=? WHERE proxy=?'
|
||||
for job in self.collected:
|
||||
self.mysqlite.execute(query, (job.failcount, job.nextcheck, 1, "unknown", job.proto, job.duration, job.proxy))
|
||||
self.mysqlite.commit()
|
||||
self.collected = []
|
||||
|
||||
def start(self):
|
||||
if config.watchd_threads == 1 and _run_standalone:
|
||||
return self._run()
|
||||
else:
|
||||
return self._run_background()
|
||||
|
||||
def run(self):
|
||||
if self.in_background:
|
||||
while 1: time.sleep(0.1)
|
||||
|
||||
def _run_background(self):
|
||||
self.in_background = True
|
||||
t = threading.Thread(target=self._run)
|
||||
t.start()
|
||||
|
||||
def _run(self):
|
||||
_log('Starting proxywatchd..', 'notice')
|
||||
self.mysqlite = mysqlite.mysqlite(config.database, str)
|
||||
|
||||
for i in range(config.watchd_threads):
|
||||
threadid = ''.join( [ random.choice(string.letters) for x in range(5) ] )
|
||||
wt = WorkerThread(threadid)
|
||||
if self.in_background:
|
||||
wt.start_thread()
|
||||
self.threads.append(wt)
|
||||
|
||||
while True:
|
||||
|
||||
if self.stopping.is_set():
|
||||
if self.in_background: self._cleanup()
|
||||
break
|
||||
|
||||
if len(self.jobs) == 0:
|
||||
self.prepare_jobs()
|
||||
if len(self.jobs):
|
||||
jpt = len(self.jobs)/config.watchd_threads
|
||||
if len(self.jobs)/float(config.watchd_threads) - jpt > 0.0: jpt += 1
|
||||
for tid in range(config.watchd_threads):
|
||||
self.threads[tid].add_jobs(self.jobs[tid*jpt:tid*jpt+jpt])
|
||||
self.jobs = []
|
||||
|
||||
if not self.in_background: # single_thread scenario
|
||||
self.threads[0].workloop()
|
||||
|
||||
self.collect_work()
|
||||
|
||||
if len(self.collected) > self.submit_after:
|
||||
self.submit_collected()
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
_run_standalone = True
|
||||
|
||||
config.load()
|
||||
|
||||
w = Proxywatchd()
|
||||
try:
|
||||
w.start()
|
||||
w.run()
|
||||
except KeyboardInterrupt as e:
|
||||
raise e
|
||||
finally:
|
||||
w.stop()
|
||||
w.finish()
|
||||
|
||||
Reference in New Issue
Block a user