From 90a6756adeae3255499915b5b1fee1daa8f0ceff Mon Sep 17 00:00:00 2001 From: Username Date: Sat, 20 Dec 2025 18:25:33 +0100 Subject: [PATCH] dbs: add indexes and optimize batch inserts --- dbs.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/dbs.py b/dbs.py index 2f0b17d..499ba87 100644 --- a/dbs.py +++ b/dbs.py @@ -17,6 +17,10 @@ def create_table_if_not_exists(sqlite, dbname): port INT, consecutive_success INT, total_duration INT)""") + # indexes for common query patterns + sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)') + sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)') + sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_proto ON proxylist(proto)') elif dbname == 'uris': sqlite.execute("""CREATE TABLE IF NOT EXISTS uris ( @@ -29,30 +33,29 @@ def create_table_if_not_exists(sqlite, dbname): proxies_added INT, added INT )""") + # indexes for common query patterns + sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_error ON uris(error)') + sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_checktime ON uris(check_time)') sqlite.commit() def insert_proxies(proxydb, proxies, url): + if not proxies: return timestamp = int(time.time()) - - new = [] + rows = [] for p in proxies: ip, port = p.split(':') - new.append((timestamp,p,ip,port,3,0,0,0,0,0)) - - proxydb.executemany('INSERT OR IGNORE INTO proxylist (added,proxy,ip,port,failed,tested,success_count,total_duration,mitm,consecutive_success) VALUES (?,?,?,?,?,?,?,?,?,?)', new) + rows.append((timestamp,p,ip,port,3,0,0,0,0,0)) + proxydb.executemany('INSERT OR IGNORE INTO proxylist (added,proxy,ip,port,failed,tested,success_count,total_duration,mitm,consecutive_success) VALUES (?,?,?,?,?,?,?,?,?,?)', rows) proxydb.commit() - _log('+%d proxy/ies from %s' % (len(proxies), url), 'added') def insert_urls(urls, search, sqlite): - query = [ 'url=?' for u in urls ] - known = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE %s' % ' OR '.join(query),urls).fetchall() ] + if not urls: return time_now = int(time.time()) - new = [ (time_now,i,0,1,0,0,0) for i in urls if not i in known ] - if not len(new): return - sqlite.executemany('INSERT OR IGNORE INTO uris (added,url,check_time,error,stale_count,retrievals,proxies_added) values(?,?,?,?,?,?,?)', new) + rows = [ (time_now,u,0,1,0,0,0) for u in urls ] + sqlite.executemany('INSERT OR IGNORE INTO uris (added,url,check_time,error,stale_count,retrievals,proxies_added) values(?,?,?,?,?,?,?)', rows) sqlite.commit() - _log('+%d url(s) from %s' % (len(new), search), 'added') + _log('+%d url(s) from %s' % (len(urls), search), 'added')