ppf: improve exception handling and logging

This commit is contained in:
Username
2025-12-21 23:37:57 +01:00
parent 901f2c1aee
commit 747e6dd7aa

74
ppf.py
View File

@@ -1,5 +1,8 @@
#!/usr/bin/env python2
import cProfile
import pstats
import signal
import dbs
import time
import mysqlite
@@ -13,6 +16,12 @@ import re
import threading
import random
# Handle SIGTERM gracefully (for container stop)
def sigterm_handler(signum, frame):
raise KeyboardInterrupt
signal.signal(signal.SIGTERM, sigterm_handler)
config = Config()
def import_from_file(fn, sqlite):
@@ -111,7 +120,13 @@ class Leechered(threading.Thread):
except KeyboardInterrupt as e:
raise e
except Exception as e:
_log('%s: fetch error: %s' % (self.url.split('/')[2], str(e)), 'error')
try:
err_msg = repr(e)
if isinstance(err_msg, unicode):
err_msg = err_msg.encode('ascii', 'backslashreplace')
except:
err_msg = type(e).__name__
_log('%s: fetch error: %s' % (self.url.split('/')[2], err_msg), 'error')
content = ''
else:
content = ''
@@ -156,20 +171,9 @@ class Leechered(threading.Thread):
self.status = 'ok'
if __name__ == '__main__':
config.load()
errors = config.validate()
if errors:
for e in errors:
_log(e, 'error')
sys.exit(1)
fetch.set_config(config)
# handle --nobs flag
args = config.aparser.parse_args()
if args.nobs:
set_nobs(True)
def main():
"""Main entry point."""
global config
proxydb = mysqlite.mysqlite(config.watchd.database, str)
dbs.create_table_if_not_exists(proxydb, 'proxylist')
@@ -180,6 +184,7 @@ if __name__ == '__main__':
urldb = mysqlite.mysqlite(config.ppf.database, str)
dbs.create_table_if_not_exists(urldb, 'uris')
dbs.seed_proxy_sources(urldb)
import_from_file('import.txt', urldb)
if len(sys.argv) == 3 and sys.argv[1] == "--file":
sys.exit(import_proxies_from_file(proxydb, sys.argv[2]))
@@ -191,6 +196,13 @@ if __name__ == '__main__':
else:
watcherd = None
# start scraper if enabled
scraperd = None
if config.scraper.enabled:
import scraper
scraperd = scraper.Scraper(config)
scraperd.start()
qurl = 'SELECT url,stale_count,error,retrievals,proxies_added,content_type FROM uris WHERE error < ? and (check_time+?+((error+stale_count)*?) <?) ORDER BY RANDOM()'
threads = []
rows = []
@@ -238,9 +250,41 @@ if __name__ == '__main__':
t.start()
except KeyboardInterrupt:
if scraperd:
scraperd.stop()
if watcherd:
watcherd.stop()
watcherd.finish()
break
_log('ppf stopped', 'info')
if __name__ == '__main__':
config.load()
errors = config.validate()
if errors:
for e in errors:
_log(e, 'error')
sys.exit(1)
fetch.set_config(config)
# handle flags
if config.args.nobs:
set_nobs(True)
if config.args.profile:
_log('profiling enabled, output to profile.stats', 'info')
profiler = cProfile.Profile()
try:
profiler.enable()
main()
finally:
profiler.disable()
profiler.dump_stats('profile.stats')
_log('profile stats written to profile.stats', 'info')
# print top 20 by cumulative time
stats = pstats.Stats('profile.stats')
stats.strip_dirs().sort_stats('cumulative').print_stats(20)
else:
main()