ppf: make it possible to import a file containing proxies directly
using --file filename.html
This commit is contained in:
@@ -31,3 +31,5 @@ class Config(ComboParser):
|
|||||||
self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per resultless check', False)
|
self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per resultless check', False)
|
||||||
self.add_item(section, 'max_fail', int, 5, 'number of fails after which an url is considered dead', False)
|
self.add_item(section, 'max_fail', int, 5, 'number of fails after which an url is considered dead', False)
|
||||||
self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True)
|
self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True)
|
||||||
|
|
||||||
|
self.aparser.add_argument("--file", help="import a single file containing proxy addrs", type=str, default='', required=False)
|
||||||
|
|||||||
69
ppf.py
69
ppf.py
@@ -11,6 +11,7 @@ from soup_parser import soupify
|
|||||||
from config import Config
|
from config import Config
|
||||||
from http2 import RsHttp, _parse_url
|
from http2 import RsHttp, _parse_url
|
||||||
import rocksock
|
import rocksock
|
||||||
|
import sys
|
||||||
|
|
||||||
config = Config()
|
config = Config()
|
||||||
|
|
||||||
@@ -145,12 +146,43 @@ def insert_proxies(proxies, sqlite, timestamp):
|
|||||||
sqlite.executemany('INSERT INTO proxylist (added,proxy,failed,tested,success_count,total_duration) VALUES (?,?,?,?,?,?)', new)
|
sqlite.executemany('INSERT INTO proxylist (added,proxy,failed,tested,success_count,total_duration) VALUES (?,?,?,?,?,?)', new)
|
||||||
sqlite.commit()
|
sqlite.commit()
|
||||||
|
|
||||||
_known_proxies = {}
|
def insert_new_proxies(proxydb, new, url):
|
||||||
|
add = []
|
||||||
|
time_now = int(time.time())
|
||||||
|
for i in new:
|
||||||
|
add.append(i)
|
||||||
|
if len(add) >= 500:
|
||||||
|
insert_proxies(add, proxydb, time_now)
|
||||||
|
add = []
|
||||||
|
if len(add): insert_proxies(add, proxydb, time_now)
|
||||||
|
_log('+%d item(s) from %s' % (len(new), url), 'added')
|
||||||
|
|
||||||
def proxyleech(proxydb, urldb, url, stale_count, error):
|
def proxyleech(proxydb, urldb, url, stale_count, error):
|
||||||
try: content = fetch_contents(url)
|
try: content = fetch_contents(url)
|
||||||
except KeyboardInterrupt as e: raise e
|
except KeyboardInterrupt as e: raise e
|
||||||
except: content = ''
|
except: content = ''
|
||||||
|
|
||||||
|
unique_count, new = extract_proxies(content)
|
||||||
|
|
||||||
|
if stale_count == 0 and error == 0: # new site
|
||||||
|
if content != '' and unique_count == 0: # site works but has zero proxy addresses
|
||||||
|
error = 99999
|
||||||
|
else:
|
||||||
|
if len(new) == 0: stale_count += 1
|
||||||
|
if content == '':
|
||||||
|
error += 1
|
||||||
|
else:
|
||||||
|
error = 0
|
||||||
|
|
||||||
|
urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=? where url=?', (error, stale_count, int(time.time()), url))
|
||||||
|
urldb.commit()
|
||||||
|
|
||||||
|
if not len(new): return
|
||||||
|
|
||||||
|
insert_new_proxies(proxydb, new, url)
|
||||||
|
|
||||||
|
_known_proxies = {}
|
||||||
|
def extract_proxies(content):
|
||||||
matches = re.findall(r'([0-9]+(?:\.[0-9]+){3}:[0-9]{2,5})[\D$]', cleanhtml(content))
|
matches = re.findall(r'([0-9]+(?:\.[0-9]+){3}:[0-9]{2,5})[\D$]', cleanhtml(content))
|
||||||
|
|
||||||
uniques_dict = {}
|
uniques_dict = {}
|
||||||
@@ -173,31 +205,15 @@ def proxyleech(proxydb, urldb, url, stale_count, error):
|
|||||||
new.append(p)
|
new.append(p)
|
||||||
_known_proxies[p] = True
|
_known_proxies[p] = True
|
||||||
|
|
||||||
if stale_count == 0 and error == 0: # new site
|
return len(uniques), new
|
||||||
if content != '' and len(uniques) == 0: # site works but has zero proxy addresses
|
|
||||||
error = 99999
|
|
||||||
else:
|
|
||||||
if len(new) == 0: stale_count += 1
|
|
||||||
if content == '':
|
|
||||||
error += 1
|
|
||||||
else:
|
|
||||||
error = 0
|
|
||||||
|
|
||||||
urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=? where url=?', (error, stale_count, int(time.time()), url))
|
|
||||||
urldb.commit()
|
|
||||||
|
|
||||||
if not len(new): return
|
|
||||||
|
|
||||||
add = []
|
|
||||||
time_now = int(time.time())
|
|
||||||
for i in new:
|
|
||||||
add.append(i)
|
|
||||||
if len(add) >= 500:
|
|
||||||
insert_proxies(add, proxydb, time_now)
|
|
||||||
add = []
|
|
||||||
if len(add): insert_proxies(add, proxydb, time_now)
|
|
||||||
_log('+%d item(s) from %s' % (len(new), url), 'added')
|
|
||||||
|
|
||||||
|
def import_proxies_from_file(proxydb, fn):
|
||||||
|
content = open(fn, 'r').read()
|
||||||
|
unique_count, new = extract_proxies(content)
|
||||||
|
if len(new):
|
||||||
|
insert_new_proxies(proxydb, new, fn)
|
||||||
|
return 0
|
||||||
|
return 1
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
config.load()
|
config.load()
|
||||||
@@ -209,7 +225,8 @@ if __name__ == '__main__':
|
|||||||
urldb = mysqlite.mysqlite(config.ppf.database, str)
|
urldb = mysqlite.mysqlite(config.ppf.database, str)
|
||||||
dbs.create_table_if_not_exists(urldb, 'uris')
|
dbs.create_table_if_not_exists(urldb, 'uris')
|
||||||
import_from_file('import.txt', urldb)
|
import_from_file('import.txt', urldb)
|
||||||
|
if len(sys.argv) == 3 and sys.argv[1] == "--file":
|
||||||
|
sys.exit(import_proxies_from_file(proxydb, sys.argv[2]))
|
||||||
|
|
||||||
if config.ppf.search:
|
if config.ppf.search:
|
||||||
## load search terms
|
## load search terms
|
||||||
|
|||||||
Reference in New Issue
Block a user