split databases

This commit is contained in:
mickael
2019-01-10 22:20:09 +00:00
parent b85cb863ba
commit 4c6a83373f
5 changed files with 47 additions and 19 deletions

View File

@@ -1,6 +1,5 @@
[common]
tor_hosts = 127.0.0.1:9050
database = proxylist.sqlite
[watchd]
max_fail = 5
@@ -11,6 +10,7 @@ use_ssl = false
debug = false
checktime = 3600
perfail_checktime = 3600
database = proxies.sqlite
[ppf]
search = true
@@ -18,4 +18,4 @@ timeout = 30
http_retries = 1
checktime = 3600
perfail_checktime = 3600
database = websites.sqlite

View File

@@ -10,7 +10,6 @@ class Config(ComboParser):
super(Config, self).__init__('config.ini')
section = 'common'
self.add_item(section, 'tor_hosts', str, '127.0.0.1:9050', 'comma-separated list of tor proxy address(es)', True)
self.add_item(section, 'database', str, 'proxylist.sqlite', 'filename of database', True)
section = 'watchd'
self.add_item(section, 'max_fail', int, 5, 'number of fails after which a proxy is considered dead', False)
@@ -21,6 +20,7 @@ class Config(ComboParser):
self.add_item(section, 'use_ssl', bool, False, 'whether to use SSL and port 6697 to connect to targets (slower)', False)
self.add_item(section, 'checktime', int, 1800, 'base checking interval for proxies in db in seconds', False)
self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for proxies in db in seconds per experienced failure', False)
self.add_item(section, 'database', str, 'websites.sqlite', 'filename of database', True)
section = 'ppf'
self.add_item(section, 'search', bool, True, 'whether to use searx search engine to find new proxy lists', False)
@@ -28,3 +28,4 @@ class Config(ComboParser):
self.add_item(section, 'http_retries', int, 1, 'number of retries for http connects', False)
self.add_item(section, 'checktime', int, 3600, 'base checking interval for urls in db in seconds', False)
self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per experienced failure', False)
self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True)

25
dbs.py Normal file
View File

@@ -0,0 +1,25 @@
import mysqlite
def create_table_if_not_exists(sqlite, dbname):
if dbname == 'proxylist':
sqlite.execute("""CREATE TABLE IF NOT EXISTS proxylist (
proxy BLOB,
country BLOB,
added INT,
failed INT,
tested INT,
dronebl INT,
proto TEXT,
success_count INT,
total_duration INT)""")
elif dbname == 'uris':
sqlite.execute("""CREATE TABLE IF NOT EXISTS uris (
added INT,
url TEXT,
check_time INT,
error INT,
stale_count INT,
hash TEXT)""")
sqlite.commit()

32
ppf.py
View File

@@ -1,5 +1,6 @@
#!/usr/bin/env python
import dbs
import random, time
import re
import urllib
@@ -102,7 +103,7 @@ def insert_proxies(proxies, uri, sqlite, timestamp):
_log('+%d item(s) from %s' % (len(new), uri), 'added')
def proxyfind(sqlite = None):
if not sqlite: sqlite = mysqlite.mysqlite(config.common.database,str)
if not sqlite: sqlite = mysqlite.mysqlite(config.ppf.database,str)
uris = [ i[0] for i in sqlite.execute('SELECT url FROM uris WHERE error=0 and url not like "%github%" ORDER BY RANDOM() LIMIT 10').fetchall() ]
if len(uris) > 0 and random.random() < random.random():
@@ -153,7 +154,7 @@ def is_usable_proxy(proxy):
(A == 172 and B >= 16 and B <= 31): return False
return True
def proxyleech(sqlite, rows):
def proxyleech(proxydb, urldb, rows):
for row in rows:
try: content = fetch_contents(row[0])
except KeyboardInterrupt as e: raise e
@@ -181,8 +182,8 @@ def proxyleech(sqlite, rows):
## proxylist was updated: error is zero
else: row[2] = 0
sqlite.execute('UPDATE uris SET error=?,hash=?,check_time=? where url=?', (row[2],hash, int(time.time()),row[0]))
sqlite.commit()
urldb.execute('UPDATE uris SET error=?,hash=?,check_time=? where url=?', (row[2],hash, int(time.time()),row[0]))
urldb.commit()
if not row[1] or row[2] > 0: return
@@ -191,9 +192,9 @@ def proxyleech(sqlite, rows):
for i in uniques:
add.append(i)
if len(add) > 500:
insert_proxies(add, row[0], sqlite, time_now)
insert_proxies(add, row[0], proxydb, time_now)
add = []
if len(add): insert_proxies(add, row[0], sqlite, time_now)
if len(add): insert_proxies(add, row[0], proxydb, time_now)
@@ -201,12 +202,13 @@ if __name__ == '__main__':
config.load()
proxies={'http':'socks4://%s' % random.choice(config.torhosts),'https':'socks4://%s' % random.choice(config.torhosts)}
sqlite = mysqlite.mysqlite(config.common.database, str)
## create dbs if required
sqlite.execute('CREATE TABLE IF NOT EXISTS uris (added INT, url TEXT, check_time INT, error INT, driver INT, hash TEXT)')
sqlite.execute('CREATE TABLE IF NOT EXISTS proxylist (proxy BLOB, country BLOB, added INT, failed INT, tested INT, dronebl INT, proto TEXT, success_count INT, total_duration INT)')
sqlite.commit()
import_from_file('import.txt', sqlite)
proxydb = mysqlite.mysqlite(config.watchd.database, str)
dbs.create_table_if_not_exists(proxydb, 'proxylist')
urldb = mysqlite.mysqlite(config.ppf.database, str)
dbs.create_table_if_not_exists(urldb, 'uris')
import_from_file('import.txt', urldb)
if config.ppf.search:
## load search terms
@@ -229,11 +231,11 @@ if __name__ == '__main__':
while True:
try:
## any site that needs to be checked ?
rows = [ [i[0],i[1],i[2]] for i in sqlite.execute('SELECT url,hash,error FROM uris WHERE (check_time+?+(error*?) <?) ORDER BY RANDOM() LIMIT 25', (config.ppf.checktime, config.ppf.perfail_checktime, int(time.time()))).fetchall() ]
rows = [ [i[0],i[1],i[2]] for i in urldb.execute('SELECT url,hash,error FROM uris WHERE (check_time+?+(error*?) <?) ORDER BY RANDOM() LIMIT 25', (config.ppf.checktime, config.ppf.perfail_checktime, int(time.time()))).fetchall() ]
if len(rows): proxyleech(sqlite,rows)
if len(rows): proxyleech(proxydb, urldb, rows)
## search for new website during free time
if config.ppf.search: proxyfind(sqlite)
if config.ppf.search: proxyfind(urldb)
## sleep
else: time.sleep(10)

View File

@@ -216,7 +216,7 @@ class Proxywatchd():
while not self.stopped.is_set(): time.sleep(0.1)
def _prep_db(self):
self.mysqlite = mysqlite.mysqlite(config.common.database, str)
self.mysqlite = mysqlite.mysqlite(config.watchd.database, str)
def _close_db(self):
if self.mysqlite:
self.mysqlite.close()