make use of dbs.insert_urls()

This commit is contained in:
Mickaël Serneels
2019-05-01 23:19:50 +02:00
parent c8d594fb73
commit c241f1a766

25
ppf.py
View File

@@ -15,13 +15,8 @@ config = Config()
def import_from_file(fn, sqlite):
with open(fn, 'r') as f:
for u in f.read().split('\n'):
if not len(u): continue
exists = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE url=?',(u,)).fetchall() ]
if exists: continue
print('adding "%s"' % u)
sqlite.execute('INSERT INTO uris (added,url,check_time,error,stale_count,proxies_added,retrievals) VALUES (?,?,?,?,?,?,?)', (int(time.time()),u,0,0,0,0,0))
sqlite.commit()
urls = [ url for url in f.read().split('\n') ]
dbs.insert_urls(urls, 'import.txt', urldb)
def get_content_type(url):
@@ -97,22 +92,8 @@ def extract_urls(html, url):
item = '%s://%s%s' % (proto,domain,item)
if not item in urls: urls.append(item)
if len(urls) < 200: continue
insert_if_not_exists(urls)
urls = []
if len(urls): insert_if_not_exists(urls)
def insert_if_not_exists(urls):
mytime = int(time.time())
query = 'SELECT url FROM uris WHERE %s' % ' OR '.join( [ 'url=?' for u in urls ] )
known = [ item[0] for item in urldb.execute(query, urls) ]
args = [ [mytime, u, (mytime - 3600), 1, 0,0,0] for u in urls if not u in known ]
if len(args):
print('new items: %s' % args)
urldb.executemany('INSERT OR IGNORE INTO uris (added,url,check_time,error,stale_count,proxies_added,retrievals) VALUES (?,?,?,?,?,?,?)', args)
urldb.commit()
if len(urls): dbs.insert_urls(urls, url, urldb) #insert_if_not_exists(urls)
def import_proxies_from_file(proxydb, fn):
content = open(fn, 'r').read()