don't loop over every searx instances
randomly pick one per search, instead
This commit is contained in:
16
scraper.py
16
scraper.py
@@ -25,14 +25,14 @@ def proxyfind(sqlite = None, urignore=None):
|
||||
|
||||
search = '%s -intitle:pdf' % search
|
||||
search_args = [ 'category=general', 'time_range=day', 'q=%s' % urllib.quote_plus(search) ]
|
||||
for srx in random.sample(searx_instances,3):
|
||||
urls = []
|
||||
random.shuffle(search_args)
|
||||
search_arg = '&'.join(search_args)
|
||||
for x in range(1,10):
|
||||
content = fetch.fetch_contents('%s/?%s&pageno=%d' % (srx,search_arg,x))
|
||||
if content: urls = fetch.extract_urls(content, urls, urignore)
|
||||
if len(urls): dbs.insert_urls(urls, search_arg, sqlite)
|
||||
searx = random.sample(searx_instances)
|
||||
urls = []
|
||||
random.shuffle(search_args)
|
||||
search_arg = '&'.join(search_args)
|
||||
for x in range(1,10):
|
||||
content = fetch.fetch_contents('%s/?%s&pageno=%d' % (srx,search_arg,x))
|
||||
if content: urls = fetch.extract_urls(content, urls, urignore)
|
||||
if len(urls): dbs.insert_urls(urls, search_arg, sqlite)
|
||||
|
||||
|
||||
def load_urignore():
|
||||
|
||||
Reference in New Issue
Block a user