don't loop over every searx instances
randomly pick one per search, instead
This commit is contained in:
16
scraper.py
16
scraper.py
@@ -25,14 +25,14 @@ def proxyfind(sqlite = None, urignore=None):
|
|||||||
|
|
||||||
search = '%s -intitle:pdf' % search
|
search = '%s -intitle:pdf' % search
|
||||||
search_args = [ 'category=general', 'time_range=day', 'q=%s' % urllib.quote_plus(search) ]
|
search_args = [ 'category=general', 'time_range=day', 'q=%s' % urllib.quote_plus(search) ]
|
||||||
for srx in random.sample(searx_instances,3):
|
searx = random.sample(searx_instances)
|
||||||
urls = []
|
urls = []
|
||||||
random.shuffle(search_args)
|
random.shuffle(search_args)
|
||||||
search_arg = '&'.join(search_args)
|
search_arg = '&'.join(search_args)
|
||||||
for x in range(1,10):
|
for x in range(1,10):
|
||||||
content = fetch.fetch_contents('%s/?%s&pageno=%d' % (srx,search_arg,x))
|
content = fetch.fetch_contents('%s/?%s&pageno=%d' % (srx,search_arg,x))
|
||||||
if content: urls = fetch.extract_urls(content, urls, urignore)
|
if content: urls = fetch.extract_urls(content, urls, urignore)
|
||||||
if len(urls): dbs.insert_urls(urls, search_arg, sqlite)
|
if len(urls): dbs.insert_urls(urls, search_arg, sqlite)
|
||||||
|
|
||||||
|
|
||||||
def load_urignore():
|
def load_urignore():
|
||||||
|
|||||||
Reference in New Issue
Block a user