diff --git a/config.ini.sample b/config.ini.sample index bbe7c98..04cb8c2 100644 --- a/config.ini.sample +++ b/config.ini.sample @@ -1,6 +1,5 @@ [common] tor_hosts = 127.0.0.1:9050 -database = proxylist.sqlite [watchd] max_fail = 5 @@ -11,6 +10,7 @@ use_ssl = false debug = false checktime = 3600 perfail_checktime = 3600 +database = proxies.sqlite [ppf] search = true @@ -18,4 +18,4 @@ timeout = 30 http_retries = 1 checktime = 3600 perfail_checktime = 3600 - +database = websites.sqlite diff --git a/config.py b/config.py index 836b4ed..fbb2115 100644 --- a/config.py +++ b/config.py @@ -10,7 +10,6 @@ class Config(ComboParser): super(Config, self).__init__('config.ini') section = 'common' self.add_item(section, 'tor_hosts', str, '127.0.0.1:9050', 'comma-separated list of tor proxy address(es)', True) - self.add_item(section, 'database', str, 'proxylist.sqlite', 'filename of database', True) section = 'watchd' self.add_item(section, 'max_fail', int, 5, 'number of fails after which a proxy is considered dead', False) @@ -21,6 +20,7 @@ class Config(ComboParser): self.add_item(section, 'use_ssl', bool, False, 'whether to use SSL and port 6697 to connect to targets (slower)', False) self.add_item(section, 'checktime', int, 1800, 'base checking interval for proxies in db in seconds', False) self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for proxies in db in seconds per experienced failure', False) + self.add_item(section, 'database', str, 'websites.sqlite', 'filename of database', True) section = 'ppf' self.add_item(section, 'search', bool, True, 'whether to use searx search engine to find new proxy lists', False) @@ -28,3 +28,4 @@ class Config(ComboParser): self.add_item(section, 'http_retries', int, 1, 'number of retries for http connects', False) self.add_item(section, 'checktime', int, 3600, 'base checking interval for urls in db in seconds', False) self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per experienced failure', False) + self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True) diff --git a/dbs.py b/dbs.py new file mode 100644 index 0000000..eaa046e --- /dev/null +++ b/dbs.py @@ -0,0 +1,25 @@ +import mysqlite + +def create_table_if_not_exists(sqlite, dbname): + if dbname == 'proxylist': + sqlite.execute("""CREATE TABLE IF NOT EXISTS proxylist ( + proxy BLOB, + country BLOB, + added INT, + failed INT, + tested INT, + dronebl INT, + proto TEXT, + success_count INT, + total_duration INT)""") + + elif dbname == 'uris': + sqlite.execute("""CREATE TABLE IF NOT EXISTS uris ( + added INT, + url TEXT, + check_time INT, + error INT, + stale_count INT, + hash TEXT)""") + + sqlite.commit() diff --git a/ppf.py b/ppf.py index cb6bcf8..a18245a 100755 --- a/ppf.py +++ b/ppf.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +import dbs import random, time import re import urllib @@ -102,7 +103,7 @@ def insert_proxies(proxies, uri, sqlite, timestamp): _log('+%d item(s) from %s' % (len(new), uri), 'added') def proxyfind(sqlite = None): - if not sqlite: sqlite = mysqlite.mysqlite(config.common.database,str) + if not sqlite: sqlite = mysqlite.mysqlite(config.ppf.database,str) uris = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE error=0 and url not like "%github%" ORDER BY RANDOM() LIMIT 10').fetchall() ] if len(uris) > 0 and random.random() < random.random(): @@ -153,7 +154,7 @@ def is_usable_proxy(proxy): (A == 172 and B >= 16 and B <= 31): return False return True -def proxyleech(sqlite, rows): +def proxyleech(proxydb, urldb, rows): for row in rows: try: content = fetch_contents(row[0]) except KeyboardInterrupt as e: raise e @@ -181,8 +182,8 @@ def proxyleech(sqlite, rows): ## proxylist was updated: error is zero else: row[2] = 0 - sqlite.execute('UPDATE uris SET error=?,hash=?,check_time=? where url=?', (row[2],hash, int(time.time()),row[0])) - sqlite.commit() + urldb.execute('UPDATE uris SET error=?,hash=?,check_time=? where url=?', (row[2],hash, int(time.time()),row[0])) + urldb.commit() if not row[1] or row[2] > 0: return @@ -191,9 +192,9 @@ def proxyleech(sqlite, rows): for i in uniques: add.append(i) if len(add) > 500: - insert_proxies(add, row[0], sqlite, time_now) + insert_proxies(add, row[0], proxydb, time_now) add = [] - if len(add): insert_proxies(add, row[0], sqlite, time_now) + if len(add): insert_proxies(add, row[0], proxydb, time_now) @@ -201,12 +202,13 @@ if __name__ == '__main__': config.load() proxies={'http':'socks4://%s' % random.choice(config.torhosts),'https':'socks4://%s' % random.choice(config.torhosts)} - sqlite = mysqlite.mysqlite(config.common.database, str) - ## create dbs if required - sqlite.execute('CREATE TABLE IF NOT EXISTS uris (added INT, url TEXT, check_time INT, error INT, driver INT, hash TEXT)') - sqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, dronebl INT, proto TEXT, success_count INT, total_duration INT)') - sqlite.commit() - import_from_file('import.txt', sqlite) + proxydb = mysqlite.mysqlite(config.watchd.database, str) + dbs.create_table_if_not_exists(proxydb, 'proxylist') + + urldb = mysqlite.mysqlite(config.ppf.database, str) + dbs.create_table_if_not_exists(urldb, 'uris') + import_from_file('import.txt', urldb) + if config.ppf.search: ## load search terms @@ -229,11 +231,11 @@ if __name__ == '__main__': while True: try: ## any site that needs to be checked ? - rows = [ [i[0],i[1],i[2]] for i in sqlite.execute('SELECT url,hash,error FROM uris WHERE (check_time+?+(error*?)