dbs: add indexes and optimize batch inserts

This commit is contained in:
Username
2025-12-20 18:25:33 +01:00
parent c054fa3c11
commit 90a6756ade

27
dbs.py
View File

@@ -17,6 +17,10 @@ def create_table_if_not_exists(sqlite, dbname):
port INT, port INT,
consecutive_success INT, consecutive_success INT,
total_duration INT)""") total_duration INT)""")
# indexes for common query patterns
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)')
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)')
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_proto ON proxylist(proto)')
elif dbname == 'uris': elif dbname == 'uris':
sqlite.execute("""CREATE TABLE IF NOT EXISTS uris ( sqlite.execute("""CREATE TABLE IF NOT EXISTS uris (
@@ -29,30 +33,29 @@ def create_table_if_not_exists(sqlite, dbname):
proxies_added INT, proxies_added INT,
added INT added INT
)""") )""")
# indexes for common query patterns
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_error ON uris(error)')
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_checktime ON uris(check_time)')
sqlite.commit() sqlite.commit()
def insert_proxies(proxydb, proxies, url): def insert_proxies(proxydb, proxies, url):
if not proxies: return
timestamp = int(time.time()) timestamp = int(time.time())
rows = []
new = []
for p in proxies: for p in proxies:
ip, port = p.split(':') ip, port = p.split(':')
new.append((timestamp,p,ip,port,3,0,0,0,0,0)) rows.append((timestamp,p,ip,port,3,0,0,0,0,0))
proxydb.executemany('INSERT OR IGNORE INTO proxylist (added,proxy,ip,port,failed,tested,success_count,total_duration,mitm,consecutive_success) VALUES (?,?,?,?,?,?,?,?,?,?)', rows)
proxydb.executemany('INSERT OR IGNORE INTO proxylist (added,proxy,ip,port,failed,tested,success_count,total_duration,mitm,consecutive_success) VALUES (?,?,?,?,?,?,?,?,?,?)', new)
proxydb.commit() proxydb.commit()
_log('+%d proxy/ies from %s' % (len(proxies), url), 'added') _log('+%d proxy/ies from %s' % (len(proxies), url), 'added')
def insert_urls(urls, search, sqlite): def insert_urls(urls, search, sqlite):
query = [ 'url=?' for u in urls ] if not urls: return
known = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE %s' % ' OR '.join(query),urls).fetchall() ]
time_now = int(time.time()) time_now = int(time.time())
new = [ (time_now,i,0,1,0,0,0) for i in urls if not i in known ] rows = [ (time_now,u,0,1,0,0,0) for u in urls ]
if not len(new): return sqlite.executemany('INSERT OR IGNORE INTO uris (added,url,check_time,error,stale_count,retrievals,proxies_added) values(?,?,?,?,?,?,?)', rows)
sqlite.executemany('INSERT OR IGNORE INTO uris (added,url,check_time,error,stale_count,retrievals,proxies_added) values(?,?,?,?,?,?,?)', new)
sqlite.commit() sqlite.commit()
_log('+%d url(s) from %s' % (len(new), search), 'added') _log('+%d url(s) from %s' % (len(urls), search), 'added')