make use of dbs.insert_urls()
This commit is contained in:
25
ppf.py
25
ppf.py
@@ -15,13 +15,8 @@ config = Config()
|
||||
|
||||
def import_from_file(fn, sqlite):
|
||||
with open(fn, 'r') as f:
|
||||
for u in f.read().split('\n'):
|
||||
if not len(u): continue
|
||||
exists = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE url=?',(u,)).fetchall() ]
|
||||
if exists: continue
|
||||
print('adding "%s"' % u)
|
||||
sqlite.execute('INSERT INTO uris (added,url,check_time,error,stale_count,proxies_added,retrievals) VALUES (?,?,?,?,?,?,?)', (int(time.time()),u,0,0,0,0,0))
|
||||
sqlite.commit()
|
||||
urls = [ url for url in f.read().split('\n') ]
|
||||
dbs.insert_urls(urls, 'import.txt', urldb)
|
||||
|
||||
|
||||
def get_content_type(url):
|
||||
@@ -97,22 +92,8 @@ def extract_urls(html, url):
|
||||
item = '%s://%s%s' % (proto,domain,item)
|
||||
|
||||
if not item in urls: urls.append(item)
|
||||
if len(urls) < 200: continue
|
||||
insert_if_not_exists(urls)
|
||||
urls = []
|
||||
|
||||
if len(urls): insert_if_not_exists(urls)
|
||||
|
||||
def insert_if_not_exists(urls):
|
||||
mytime = int(time.time())
|
||||
query = 'SELECT url FROM uris WHERE %s' % ' OR '.join( [ 'url=?' for u in urls ] )
|
||||
known = [ item[0] for item in urldb.execute(query, urls) ]
|
||||
args = [ [mytime, u, (mytime - 3600), 1, 0,0,0] for u in urls if not u in known ]
|
||||
if len(args):
|
||||
print('new items: %s' % args)
|
||||
urldb.executemany('INSERT OR IGNORE INTO uris (added,url,check_time,error,stale_count,proxies_added,retrievals) VALUES (?,?,?,?,?,?,?)', args)
|
||||
urldb.commit()
|
||||
|
||||
if len(urls): dbs.insert_urls(urls, url, urldb) #insert_if_not_exists(urls)
|
||||
|
||||
def import_proxies_from_file(proxydb, fn):
|
||||
content = open(fn, 'r').read()
|
||||
|
||||
Reference in New Issue
Block a user