split databases
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
[common]
|
||||
tor_hosts = 127.0.0.1:9050
|
||||
database = proxylist.sqlite
|
||||
|
||||
[watchd]
|
||||
max_fail = 5
|
||||
@@ -11,6 +10,7 @@ use_ssl = false
|
||||
debug = false
|
||||
checktime = 3600
|
||||
perfail_checktime = 3600
|
||||
database = proxies.sqlite
|
||||
|
||||
[ppf]
|
||||
search = true
|
||||
@@ -18,4 +18,4 @@ timeout = 30
|
||||
http_retries = 1
|
||||
checktime = 3600
|
||||
perfail_checktime = 3600
|
||||
|
||||
database = websites.sqlite
|
||||
|
||||
@@ -10,7 +10,6 @@ class Config(ComboParser):
|
||||
super(Config, self).__init__('config.ini')
|
||||
section = 'common'
|
||||
self.add_item(section, 'tor_hosts', str, '127.0.0.1:9050', 'comma-separated list of tor proxy address(es)', True)
|
||||
self.add_item(section, 'database', str, 'proxylist.sqlite', 'filename of database', True)
|
||||
|
||||
section = 'watchd'
|
||||
self.add_item(section, 'max_fail', int, 5, 'number of fails after which a proxy is considered dead', False)
|
||||
@@ -21,6 +20,7 @@ class Config(ComboParser):
|
||||
self.add_item(section, 'use_ssl', bool, False, 'whether to use SSL and port 6697 to connect to targets (slower)', False)
|
||||
self.add_item(section, 'checktime', int, 1800, 'base checking interval for proxies in db in seconds', False)
|
||||
self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for proxies in db in seconds per experienced failure', False)
|
||||
self.add_item(section, 'database', str, 'websites.sqlite', 'filename of database', True)
|
||||
|
||||
section = 'ppf'
|
||||
self.add_item(section, 'search', bool, True, 'whether to use searx search engine to find new proxy lists', False)
|
||||
@@ -28,3 +28,4 @@ class Config(ComboParser):
|
||||
self.add_item(section, 'http_retries', int, 1, 'number of retries for http connects', False)
|
||||
self.add_item(section, 'checktime', int, 3600, 'base checking interval for urls in db in seconds', False)
|
||||
self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per experienced failure', False)
|
||||
self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True)
|
||||
|
||||
25
dbs.py
Normal file
25
dbs.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import mysqlite
|
||||
|
||||
def create_table_if_not_exists(sqlite, dbname):
|
||||
if dbname == 'proxylist':
|
||||
sqlite.execute("""CREATE TABLE IF NOT EXISTS proxylist (
|
||||
proxy BLOB,
|
||||
country BLOB,
|
||||
added INT,
|
||||
failed INT,
|
||||
tested INT,
|
||||
dronebl INT,
|
||||
proto TEXT,
|
||||
success_count INT,
|
||||
total_duration INT)""")
|
||||
|
||||
elif dbname == 'uris':
|
||||
sqlite.execute("""CREATE TABLE IF NOT EXISTS uris (
|
||||
added INT,
|
||||
url TEXT,
|
||||
check_time INT,
|
||||
error INT,
|
||||
stale_count INT,
|
||||
hash TEXT)""")
|
||||
|
||||
sqlite.commit()
|
||||
32
ppf.py
32
ppf.py
@@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import dbs
|
||||
import random, time
|
||||
import re
|
||||
import urllib
|
||||
@@ -102,7 +103,7 @@ def insert_proxies(proxies, uri, sqlite, timestamp):
|
||||
_log('+%d item(s) from %s' % (len(new), uri), 'added')
|
||||
|
||||
def proxyfind(sqlite = None):
|
||||
if not sqlite: sqlite = mysqlite.mysqlite(config.common.database,str)
|
||||
if not sqlite: sqlite = mysqlite.mysqlite(config.ppf.database,str)
|
||||
|
||||
uris = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE error=0 and url not like "%github%" ORDER BY RANDOM() LIMIT 10').fetchall() ]
|
||||
if len(uris) > 0 and random.random() < random.random():
|
||||
@@ -153,7 +154,7 @@ def is_usable_proxy(proxy):
|
||||
(A == 172 and B >= 16 and B <= 31): return False
|
||||
return True
|
||||
|
||||
def proxyleech(sqlite, rows):
|
||||
def proxyleech(proxydb, urldb, rows):
|
||||
for row in rows:
|
||||
try: content = fetch_contents(row[0])
|
||||
except KeyboardInterrupt as e: raise e
|
||||
@@ -181,8 +182,8 @@ def proxyleech(sqlite, rows):
|
||||
## proxylist was updated: error is zero
|
||||
else: row[2] = 0
|
||||
|
||||
sqlite.execute('UPDATE uris SET error=?,hash=?,check_time=? where url=?', (row[2],hash, int(time.time()),row[0]))
|
||||
sqlite.commit()
|
||||
urldb.execute('UPDATE uris SET error=?,hash=?,check_time=? where url=?', (row[2],hash, int(time.time()),row[0]))
|
||||
urldb.commit()
|
||||
|
||||
if not row[1] or row[2] > 0: return
|
||||
|
||||
@@ -191,9 +192,9 @@ def proxyleech(sqlite, rows):
|
||||
for i in uniques:
|
||||
add.append(i)
|
||||
if len(add) > 500:
|
||||
insert_proxies(add, row[0], sqlite, time_now)
|
||||
insert_proxies(add, row[0], proxydb, time_now)
|
||||
add = []
|
||||
if len(add): insert_proxies(add, row[0], sqlite, time_now)
|
||||
if len(add): insert_proxies(add, row[0], proxydb, time_now)
|
||||
|
||||
|
||||
|
||||
@@ -201,12 +202,13 @@ if __name__ == '__main__':
|
||||
config.load()
|
||||
proxies={'http':'socks4://%s' % random.choice(config.torhosts),'https':'socks4://%s' % random.choice(config.torhosts)}
|
||||
|
||||
sqlite = mysqlite.mysqlite(config.common.database, str)
|
||||
## create dbs if required
|
||||
sqlite.execute('CREATE TABLE IF NOT EXISTS uris (added INT, url TEXT, check_time INT, error INT, driver INT, hash TEXT)')
|
||||
sqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, dronebl INT, proto TEXT, success_count INT, total_duration INT)')
|
||||
sqlite.commit()
|
||||
import_from_file('import.txt', sqlite)
|
||||
proxydb = mysqlite.mysqlite(config.watchd.database, str)
|
||||
dbs.create_table_if_not_exists(proxydb, 'proxylist')
|
||||
|
||||
urldb = mysqlite.mysqlite(config.ppf.database, str)
|
||||
dbs.create_table_if_not_exists(urldb, 'uris')
|
||||
import_from_file('import.txt', urldb)
|
||||
|
||||
|
||||
if config.ppf.search:
|
||||
## load search terms
|
||||
@@ -229,11 +231,11 @@ if __name__ == '__main__':
|
||||
while True:
|
||||
try:
|
||||
## any site that needs to be checked ?
|
||||
rows = [ [i[0],i[1],i[2]] for i in sqlite.execute('SELECT url,hash,error FROM uris WHERE (check_time+?+(error*?) <?) ORDER BY RANDOM() LIMIT 25', (config.ppf.checktime, config.ppf.perfail_checktime, int(time.time()))).fetchall() ]
|
||||
rows = [ [i[0],i[1],i[2]] for i in urldb.execute('SELECT url,hash,error FROM uris WHERE (check_time+?+(error*?) <?) ORDER BY RANDOM() LIMIT 25', (config.ppf.checktime, config.ppf.perfail_checktime, int(time.time()))).fetchall() ]
|
||||
|
||||
if len(rows): proxyleech(sqlite,rows)
|
||||
if len(rows): proxyleech(proxydb, urldb, rows)
|
||||
## search for new website during free time
|
||||
if config.ppf.search: proxyfind(sqlite)
|
||||
if config.ppf.search: proxyfind(urldb)
|
||||
## sleep
|
||||
else: time.sleep(10)
|
||||
|
||||
|
||||
@@ -216,7 +216,7 @@ class Proxywatchd():
|
||||
while not self.stopped.is_set(): time.sleep(0.1)
|
||||
|
||||
def _prep_db(self):
|
||||
self.mysqlite = mysqlite.mysqlite(config.common.database, str)
|
||||
self.mysqlite = mysqlite.mysqlite(config.watchd.database, str)
|
||||
def _close_db(self):
|
||||
if self.mysqlite:
|
||||
self.mysqlite.close()
|
||||
|
||||
Reference in New Issue
Block a user