make use of dbs.insert_urls()
This commit is contained in:
25
ppf.py
25
ppf.py
@@ -15,13 +15,8 @@ config = Config()
|
|||||||
|
|
||||||
def import_from_file(fn, sqlite):
|
def import_from_file(fn, sqlite):
|
||||||
with open(fn, 'r') as f:
|
with open(fn, 'r') as f:
|
||||||
for u in f.read().split('\n'):
|
urls = [ url for url in f.read().split('\n') ]
|
||||||
if not len(u): continue
|
dbs.insert_urls(urls, 'import.txt', urldb)
|
||||||
exists = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE url=?',(u,)).fetchall() ]
|
|
||||||
if exists: continue
|
|
||||||
print('adding "%s"' % u)
|
|
||||||
sqlite.execute('INSERT INTO uris (added,url,check_time,error,stale_count,proxies_added,retrievals) VALUES (?,?,?,?,?,?,?)', (int(time.time()),u,0,0,0,0,0))
|
|
||||||
sqlite.commit()
|
|
||||||
|
|
||||||
|
|
||||||
def get_content_type(url):
|
def get_content_type(url):
|
||||||
@@ -97,22 +92,8 @@ def extract_urls(html, url):
|
|||||||
item = '%s://%s%s' % (proto,domain,item)
|
item = '%s://%s%s' % (proto,domain,item)
|
||||||
|
|
||||||
if not item in urls: urls.append(item)
|
if not item in urls: urls.append(item)
|
||||||
if len(urls) < 200: continue
|
|
||||||
insert_if_not_exists(urls)
|
|
||||||
urls = []
|
|
||||||
|
|
||||||
if len(urls): insert_if_not_exists(urls)
|
|
||||||
|
|
||||||
def insert_if_not_exists(urls):
|
|
||||||
mytime = int(time.time())
|
|
||||||
query = 'SELECT url FROM uris WHERE %s' % ' OR '.join( [ 'url=?' for u in urls ] )
|
|
||||||
known = [ item[0] for item in urldb.execute(query, urls) ]
|
|
||||||
args = [ [mytime, u, (mytime - 3600), 1, 0,0,0] for u in urls if not u in known ]
|
|
||||||
if len(args):
|
|
||||||
print('new items: %s' % args)
|
|
||||||
urldb.executemany('INSERT OR IGNORE INTO uris (added,url,check_time,error,stale_count,proxies_added,retrievals) VALUES (?,?,?,?,?,?,?)', args)
|
|
||||||
urldb.commit()
|
|
||||||
|
|
||||||
|
if len(urls): dbs.insert_urls(urls, url, urldb) #insert_if_not_exists(urls)
|
||||||
|
|
||||||
def import_proxies_from_file(proxydb, fn):
|
def import_proxies_from_file(proxydb, fn):
|
||||||
content = open(fn, 'r').read()
|
content = open(fn, 'r').read()
|
||||||
|
|||||||
Reference in New Issue
Block a user