diff --git a/config.py b/config.py index dd5d688..862a377 100644 --- a/config.py +++ b/config.py @@ -31,3 +31,5 @@ class Config(ComboParser): self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per resultless check', False) self.add_item(section, 'max_fail', int, 5, 'number of fails after which an url is considered dead', False) self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True) + + self.aparser.add_argument("--file", help="import a single file containing proxy addrs", type=str, default='', required=False) diff --git a/ppf.py b/ppf.py index bdd6fd1..55f0eb3 100755 --- a/ppf.py +++ b/ppf.py @@ -11,6 +11,7 @@ from soup_parser import soupify from config import Config from http2 import RsHttp, _parse_url import rocksock +import sys config = Config() @@ -145,12 +146,43 @@ def insert_proxies(proxies, sqlite, timestamp): sqlite.executemany('INSERT INTO proxylist (added,proxy,failed,tested,success_count,total_duration) VALUES (?,?,?,?,?,?)', new) sqlite.commit() -_known_proxies = {} +def insert_new_proxies(proxydb, new, url): + add = [] + time_now = int(time.time()) + for i in new: + add.append(i) + if len(add) >= 500: + insert_proxies(add, proxydb, time_now) + add = [] + if len(add): insert_proxies(add, proxydb, time_now) + _log('+%d item(s) from %s' % (len(new), url), 'added') + def proxyleech(proxydb, urldb, url, stale_count, error): try: content = fetch_contents(url) except KeyboardInterrupt as e: raise e except: content = '' + unique_count, new = extract_proxies(content) + + if stale_count == 0 and error == 0: # new site + if content != '' and unique_count == 0: # site works but has zero proxy addresses + error = 99999 + else: + if len(new) == 0: stale_count += 1 + if content == '': + error += 1 + else: + error = 0 + + urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=? where url=?', (error, stale_count, int(time.time()), url)) + urldb.commit() + + if not len(new): return + + insert_new_proxies(proxydb, new, url) + +_known_proxies = {} +def extract_proxies(content): matches = re.findall(r'([0-9]+(?:\.[0-9]+){3}:[0-9]{2,5})[\D$]', cleanhtml(content)) uniques_dict = {} @@ -173,31 +205,15 @@ def proxyleech(proxydb, urldb, url, stale_count, error): new.append(p) _known_proxies[p] = True - if stale_count == 0 and error == 0: # new site - if content != '' and len(uniques) == 0: # site works but has zero proxy addresses - error = 99999 - else: - if len(new) == 0: stale_count += 1 - if content == '': - error += 1 - else: - error = 0 - - urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=? where url=?', (error, stale_count, int(time.time()), url)) - urldb.commit() - - if not len(new): return - - add = [] - time_now = int(time.time()) - for i in new: - add.append(i) - if len(add) >= 500: - insert_proxies(add, proxydb, time_now) - add = [] - if len(add): insert_proxies(add, proxydb, time_now) - _log('+%d item(s) from %s' % (len(new), url), 'added') + return len(uniques), new +def import_proxies_from_file(proxydb, fn): + content = open(fn, 'r').read() + unique_count, new = extract_proxies(content) + if len(new): + insert_new_proxies(proxydb, new, fn) + return 0 + return 1 if __name__ == '__main__': config.load() @@ -209,7 +225,8 @@ if __name__ == '__main__': urldb = mysqlite.mysqlite(config.ppf.database, str) dbs.create_table_if_not_exists(urldb, 'uris') import_from_file('import.txt', urldb) - + if len(sys.argv) == 3 and sys.argv[1] == "--file": + sys.exit(import_proxies_from_file(proxydb, sys.argv[2])) if config.ppf.search: ## load search terms