dbs: add indexes and optimize batch inserts
This commit is contained in:
27
dbs.py
27
dbs.py
@@ -17,6 +17,10 @@ def create_table_if_not_exists(sqlite, dbname):
|
||||
port INT,
|
||||
consecutive_success INT,
|
||||
total_duration INT)""")
|
||||
# indexes for common query patterns
|
||||
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)')
|
||||
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)')
|
||||
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_proto ON proxylist(proto)')
|
||||
|
||||
elif dbname == 'uris':
|
||||
sqlite.execute("""CREATE TABLE IF NOT EXISTS uris (
|
||||
@@ -29,30 +33,29 @@ def create_table_if_not_exists(sqlite, dbname):
|
||||
proxies_added INT,
|
||||
added INT
|
||||
)""")
|
||||
# indexes for common query patterns
|
||||
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_error ON uris(error)')
|
||||
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_checktime ON uris(check_time)')
|
||||
|
||||
sqlite.commit()
|
||||
|
||||
def insert_proxies(proxydb, proxies, url):
|
||||
if not proxies: return
|
||||
timestamp = int(time.time())
|
||||
|
||||
new = []
|
||||
rows = []
|
||||
for p in proxies:
|
||||
ip, port = p.split(':')
|
||||
new.append((timestamp,p,ip,port,3,0,0,0,0,0))
|
||||
|
||||
proxydb.executemany('INSERT OR IGNORE INTO proxylist (added,proxy,ip,port,failed,tested,success_count,total_duration,mitm,consecutive_success) VALUES (?,?,?,?,?,?,?,?,?,?)', new)
|
||||
rows.append((timestamp,p,ip,port,3,0,0,0,0,0))
|
||||
proxydb.executemany('INSERT OR IGNORE INTO proxylist (added,proxy,ip,port,failed,tested,success_count,total_duration,mitm,consecutive_success) VALUES (?,?,?,?,?,?,?,?,?,?)', rows)
|
||||
proxydb.commit()
|
||||
|
||||
_log('+%d proxy/ies from %s' % (len(proxies), url), 'added')
|
||||
|
||||
|
||||
def insert_urls(urls, search, sqlite):
|
||||
query = [ 'url=?' for u in urls ]
|
||||
known = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE %s' % ' OR '.join(query),urls).fetchall() ]
|
||||
if not urls: return
|
||||
time_now = int(time.time())
|
||||
new = [ (time_now,i,0,1,0,0,0) for i in urls if not i in known ]
|
||||
if not len(new): return
|
||||
sqlite.executemany('INSERT OR IGNORE INTO uris (added,url,check_time,error,stale_count,retrievals,proxies_added) values(?,?,?,?,?,?,?)', new)
|
||||
rows = [ (time_now,u,0,1,0,0,0) for u in urls ]
|
||||
sqlite.executemany('INSERT OR IGNORE INTO uris (added,url,check_time,error,stale_count,retrievals,proxies_added) values(?,?,?,?,?,?,?)', rows)
|
||||
sqlite.commit()
|
||||
_log('+%d url(s) from %s' % (len(new), search), 'added')
|
||||
_log('+%d url(s) from %s' % (len(urls), search), 'added')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user