changes
This commit is contained in:
10
ppf.py
10
ppf.py
@@ -98,9 +98,9 @@ def extract_urls(html, url):
|
||||
if not item.startswith('/'): item = '/%s' % item
|
||||
item = '%s://%s%s' % (proto,domain,item)
|
||||
|
||||
if is_bad_url(item, domain=domain, samedomain=config.ppf.extract_samedomain):
|
||||
elif is_bad_url(item, domain=domain, samedomain=config.ppf.extract_samedomain):
|
||||
continue
|
||||
elif not item in urls: urls.append(item)
|
||||
if not item in urls: urls.append(item)
|
||||
|
||||
if len(urls): dbs.insert_urls(urls, url, urldb) #insert_if_not_exists(urls)
|
||||
|
||||
@@ -329,7 +329,12 @@ if __name__ == '__main__':
|
||||
if len(rows) < config.ppf.threads:
|
||||
rows = []
|
||||
else:
|
||||
nao = time.time()
|
||||
_log('handing %d job(s) to %d thread(s)' % ( len(rows), config.ppf.threads ), 'ppf')
|
||||
args = [ (nao, row[0]) for row in rows ]
|
||||
urldb.executemany('UPDATE uris SET check_time=? where url=?', args)
|
||||
urldb.commit()
|
||||
|
||||
|
||||
for thread in threads:
|
||||
if thread.status == 'ok':
|
||||
@@ -342,6 +347,7 @@ if __name__ == '__main__':
|
||||
_known_proxies[p]=1
|
||||
execute = (error, stale_count, int(time.time()), retrievals, proxies_added+len(new), content_type, url)
|
||||
urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=?,retrievals=?,proxies_added=?,content_type=? where url=?', execute)
|
||||
urldb.commit()
|
||||
if len(new): dbs.insert_proxies(proxydb, new, url)
|
||||
|
||||
threads = [ thread for thread in threads if thread.is_alive() ]
|
||||
|
||||
Reference in New Issue
Block a user