more random changes

This commit is contained in:
Your Name
2021-02-06 11:00:25 +01:00
parent e15b9d2994
commit 9aa2c91f41
3 changed files with 76 additions and 67 deletions

View File

@@ -83,10 +83,7 @@ class WorkerJob():
] ]
try: try:
if self.isoldies: sock = rocksock.Rocksock(host=srv, port=server_port, ssl=use_ssl, proxies=proxies, timeout=config.watchd.timeout)
sock = rocksock.Rocksock(host=srv, port=server_port, ssl=use_ssl, proxies=proxies, timeout=config.watchd.timeout - 2)
else:
sock = rocksock.Rocksock(host=srv, port=server_port, ssl=use_ssl, proxies=proxies, timeout=config.watchd.timeout)
sock.connect() sock.connect()
sock.send('NICK\n') sock.send('NICK\n')
return sock, proto, duration, torhost, srvname, 0 return sock, proto, duration, torhost, srvname, 0
@@ -276,11 +273,13 @@ class Proxywatchd():
q = 'SELECT proxy,proto,failed,success_count,total_duration,country FROM proxylist WHERE failed >= ? and failed < ? and (tested + ? + (failed * ?)) < ? ORDER BY RANDOM()' q = 'SELECT proxy,proto,failed,success_count,total_duration,country FROM proxylist WHERE failed >= ? and failed < ? and (tested + ? + (failed * ?)) < ? ORDER BY RANDOM()'
rows = self.mysqlite.execute(q, (0, config.watchd.max_fail, config.watchd.checktime, config.watchd.perfail_checktime, time.time())).fetchall() rows = self.mysqlite.execute(q, (0, config.watchd.max_fail, config.watchd.checktime, config.watchd.perfail_checktime, time.time())).fetchall()
# check oldies ? # check oldies ?
if len(rows) < config.watchd.threads and config.watchd.oldies: if len(rows) < config.watchd.threads:
self.isoldies = True rows = []
## disable tor safeguard for old proxies if config.watchd.oldies:
if self.tor_safeguard: self.tor_safeguard = False self.isoldies = True
rows = self.mysqlite.execute(q, (config.watchd.max_fail, config.watchd.max_fail*2, config.watchd.checktime, config.watchd.oldies_checktime, time.time())).fetchall() ## disable tor safeguard for old proxies
if self.tor_safeguard: self.tor_safeguard = False
rows = self.mysqlite.execute(q, (config.watchd.max_fail, config.watchd.max_fail*2, config.watchd.checktime, config.watchd.oldies_checktime, time.time())).fetchall()
return rows return rows
def prepare_jobs(self): def prepare_jobs(self):

View File

@@ -13,78 +13,82 @@ import sys
config = Config() config = Config()
with open('searx.instances') as h: with open('searx.instances') as h:
searx_instances = [ line.strip() for line in h.readlines() if line.lower().startswith('http') ] searx_instances = [ line.strip() for line in h.readlines() if line.lower().startswith('http') ]
print(searx_instances) print(searx_instances)
def proxyfind(sqlite = None, urignore=None): def proxyfind(sqlite = None, urignore=None):
search = '' search = ''
random.shuffle(searx_instances) random.shuffle(searx_instances)
if 'p' in config.scraper.query:
proxydb = mysqlite.mysqlite(config.watchd.database,str)
proxies = [ i[0] for i in proxydb.execute('SELECT proxy FROM proxylist WHERE failed=0 ORDER BY RANDOM() LIMIT 10').fetchall() ]
if len(proxies) and random.random() < random.random():
search = ' '.join( random.sample(proxies, random.randint(1,2)))
if 'w' in config.scraper.query and not len(search) or random.random() < random.random(): ## search by working proxy
if not sqlite: sqlite = mysqlite.mysqlite(config.ppf.database,str) if 'p' in config.scraper.query:
uris = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE error=0 and url not like "%github%" ORDER BY RANDOM() LIMIT 10').fetchall() ] proxydb = mysqlite.mysqlite(config.watchd.database,str)
if len(uris) > 0 and random.random() < random.random(): proxies = [ i[0] for i in proxydb.execute('SELECT proxy FROM proxylist WHERE failed=0 ORDER BY RANDOM() LIMIT 10').fetchall() ]
if len(search): search = '%s OR ' % search if len(proxies) and random.random() < random.random():
search = search + 'site:%s' % random.choice(uris).split('/')[2] search = ' '.join( random.sample(proxies, random.randint(1,2)))
if 's' in config.scraper.query and not len(search) or random.random() < random.random(): ## search by relative url
if len(search): search = '%s OR ' % search if 'w' in config.scraper.query and not len(search) or random.random() < random.random():
search = search + random.choice(search_terms) if not sqlite: sqlite = mysqlite.mysqlite(config.ppf.database,str)
uris = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE error=0 and url not like "%github%" ORDER BY RANDOM() LIMIT 10').fetchall() ]
if len(uris) > 0 and random.random() < random.random():
if len(search): search = '%s OR ' % search
search = search + 'site:%s' % random.choice(uris).split('/')[2]
if not len(search): return ## build string
search_args = [ 'category=general', 'time_range=%s' % random.choice(['day','week','month','year']), 'q=%s' % urllib.quote_plus(search) ] if 's' in config.scraper.query and not len(search) or random.random() < random.random():
random.shuffle(search_args) if len(search): search = '%s OR ' % search
search_arg = '&'.join(search_args) search = search + random.choice(search_terms)
if config.scraper.debug: if not len(search): return
print('search_arg: %s' % search_arg) search_args = [ 'category=general', 'time_range=%s' % random.choice(['day','week','month','year']), 'q=%s' % urllib.quote_plus(search) ]
random.shuffle(search_args)
search_arg = '&'.join(search_args)
for srx in searx_instances: if config.scraper.debug:
x = 0 print('search_arg: %s' % search_arg)
while 1:
urls = []
if x > 0: content = fetch.fetch_contents('%s/?%s&pageno=%d' % (srx,search_arg,x))
else: content = fetch.fetch_contents('%s/?%s' % (srx,search_arg))
if content: urls = fetch.extract_urls(content, urls, urignore)
if not len(urls): break for srx in searx_instances:
dbs.insert_urls(urls, '%s/?%s (pageno: %d)' % (srx.split('/')[2],search_arg,x) , sqlite) x = 0
x = x + 1 while 1:
urls = []
if x > 0: content = fetch.fetch_contents('%s/?%s&pageno=%d' % (srx,search_arg,x))
else: content = fetch.fetch_contents('%s/?%s' % (srx,search_arg))
if content: urls = fetch.extract_urls(content, urls, urignore)
if not len(urls): break
dbs.insert_urls(urls, '%s/?%s (pageno: %d)' % (srx.split('/')[2],search_arg,x) , sqlite)
x = x + 1
def load_urignore(): def load_urignore():
## load bad terms ## load bad terms
with open('urignore.txt', 'r') as f: with open('urignore.txt', 'r') as f:
urignore = [ i.strip() for i in f.read().split('\n') if len(i.strip()) ] urignore = [ i.strip() for i in f.read().split('\n') if len(i.strip()) ]
## add searx instances as bad terms (avoid loops) ## add searx instances as bad terms (avoid loops)
for i in searx_instances: for i in searx_instances:
urignore.append(i.split('/')[2]) urignore.append(i.split('/')[2])
return urignore return urignore
if __name__ == '__main__': if __name__ == '__main__':
config.load() config.load()
fetch.set_config(config) fetch.set_config(config)
proxydb = mysqlite.mysqlite(config.watchd.database, str) proxydb = mysqlite.mysqlite(config.watchd.database, str)
dbs.create_table_if_not_exists(proxydb, 'proxylist') dbs.create_table_if_not_exists(proxydb, 'proxylist')
urldb = mysqlite.mysqlite(config.ppf.database, str) urldb = mysqlite.mysqlite(config.ppf.database, str)
dbs.create_table_if_not_exists(urldb, 'uris') dbs.create_table_if_not_exists(urldb, 'uris')
## load search terms ## load search terms
with open('search_terms.txt', 'r') as f: with open('search_terms.txt', 'r') as f:
search_terms = [ i.strip() for i in f.read().split('\n') if len(i.strip()) ] search_terms = [ i.strip() for i in f.read().split('\n') if len(i.strip()) ]
urignore = load_urignore() urignore = load_urignore()
while True: while True:
try: proxyfind(urldb, urignore) try: proxyfind(urldb, urignore)
except KeyboardInterrupt: break except KeyboardInterrupt: break
print '\r', print '\r',

View File

@@ -1,6 +1,8 @@
site:github.com :8081 :8888 :8080 elite proxylist
site:github.com :4444 :1234 :3124 elite http proxies
site:github.com proxylist elite socks proxies
anonymous proxies
anonymous proxylist
hourly http proxy hourly http proxy
hourly socks proxy hourly socks proxy
daily http proxy daily http proxy
@@ -11,3 +13,7 @@ updated http proxy list
updated socks proxy list updated socks proxy list
download http proxy download http proxy
download socks proxy download socks proxy
доверенное лицо
свежий список прокси
http прокси
socks прокси