From 747e6dd7aa40c0089b9e246a22557e9b402dd57f Mon Sep 17 00:00:00 2001 From: Username Date: Sun, 21 Dec 2025 23:37:57 +0100 Subject: [PATCH] ppf: improve exception handling and logging --- ppf.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 15 deletions(-) diff --git a/ppf.py b/ppf.py index adf69d8..74e20fc 100644 --- a/ppf.py +++ b/ppf.py @@ -1,5 +1,8 @@ #!/usr/bin/env python2 +import cProfile +import pstats +import signal import dbs import time import mysqlite @@ -13,6 +16,12 @@ import re import threading import random +# Handle SIGTERM gracefully (for container stop) +def sigterm_handler(signum, frame): + raise KeyboardInterrupt + +signal.signal(signal.SIGTERM, sigterm_handler) + config = Config() def import_from_file(fn, sqlite): @@ -111,7 +120,13 @@ class Leechered(threading.Thread): except KeyboardInterrupt as e: raise e except Exception as e: - _log('%s: fetch error: %s' % (self.url.split('/')[2], str(e)), 'error') + try: + err_msg = repr(e) + if isinstance(err_msg, unicode): + err_msg = err_msg.encode('ascii', 'backslashreplace') + except: + err_msg = type(e).__name__ + _log('%s: fetch error: %s' % (self.url.split('/')[2], err_msg), 'error') content = '' else: content = '' @@ -156,20 +171,9 @@ class Leechered(threading.Thread): self.status = 'ok' -if __name__ == '__main__': - config.load() - errors = config.validate() - if errors: - for e in errors: - _log(e, 'error') - sys.exit(1) - fetch.set_config(config) - - # handle --nobs flag - args = config.aparser.parse_args() - if args.nobs: - set_nobs(True) - +def main(): + """Main entry point.""" + global config proxydb = mysqlite.mysqlite(config.watchd.database, str) dbs.create_table_if_not_exists(proxydb, 'proxylist') @@ -180,6 +184,7 @@ if __name__ == '__main__': urldb = mysqlite.mysqlite(config.ppf.database, str) dbs.create_table_if_not_exists(urldb, 'uris') + dbs.seed_proxy_sources(urldb) import_from_file('import.txt', urldb) if len(sys.argv) == 3 and sys.argv[1] == "--file": sys.exit(import_proxies_from_file(proxydb, sys.argv[2])) @@ -191,6 +196,13 @@ if __name__ == '__main__': else: watcherd = None + # start scraper if enabled + scraperd = None + if config.scraper.enabled: + import scraper + scraperd = scraper.Scraper(config) + scraperd.start() + qurl = 'SELECT url,stale_count,error,retrievals,proxies_added,content_type FROM uris WHERE error < ? and (check_time+?+((error+stale_count)*?)