ppf: improve exception handling and logging
This commit is contained in:
74
ppf.py
74
ppf.py
@@ -1,5 +1,8 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
|
import cProfile
|
||||||
|
import pstats
|
||||||
|
import signal
|
||||||
import dbs
|
import dbs
|
||||||
import time
|
import time
|
||||||
import mysqlite
|
import mysqlite
|
||||||
@@ -13,6 +16,12 @@ import re
|
|||||||
import threading
|
import threading
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
# Handle SIGTERM gracefully (for container stop)
|
||||||
|
def sigterm_handler(signum, frame):
|
||||||
|
raise KeyboardInterrupt
|
||||||
|
|
||||||
|
signal.signal(signal.SIGTERM, sigterm_handler)
|
||||||
|
|
||||||
config = Config()
|
config = Config()
|
||||||
|
|
||||||
def import_from_file(fn, sqlite):
|
def import_from_file(fn, sqlite):
|
||||||
@@ -111,7 +120,13 @@ class Leechered(threading.Thread):
|
|||||||
except KeyboardInterrupt as e:
|
except KeyboardInterrupt as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
_log('%s: fetch error: %s' % (self.url.split('/')[2], str(e)), 'error')
|
try:
|
||||||
|
err_msg = repr(e)
|
||||||
|
if isinstance(err_msg, unicode):
|
||||||
|
err_msg = err_msg.encode('ascii', 'backslashreplace')
|
||||||
|
except:
|
||||||
|
err_msg = type(e).__name__
|
||||||
|
_log('%s: fetch error: %s' % (self.url.split('/')[2], err_msg), 'error')
|
||||||
content = ''
|
content = ''
|
||||||
else:
|
else:
|
||||||
content = ''
|
content = ''
|
||||||
@@ -156,20 +171,9 @@ class Leechered(threading.Thread):
|
|||||||
self.status = 'ok'
|
self.status = 'ok'
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
def main():
|
||||||
config.load()
|
"""Main entry point."""
|
||||||
errors = config.validate()
|
global config
|
||||||
if errors:
|
|
||||||
for e in errors:
|
|
||||||
_log(e, 'error')
|
|
||||||
sys.exit(1)
|
|
||||||
fetch.set_config(config)
|
|
||||||
|
|
||||||
# handle --nobs flag
|
|
||||||
args = config.aparser.parse_args()
|
|
||||||
if args.nobs:
|
|
||||||
set_nobs(True)
|
|
||||||
|
|
||||||
|
|
||||||
proxydb = mysqlite.mysqlite(config.watchd.database, str)
|
proxydb = mysqlite.mysqlite(config.watchd.database, str)
|
||||||
dbs.create_table_if_not_exists(proxydb, 'proxylist')
|
dbs.create_table_if_not_exists(proxydb, 'proxylist')
|
||||||
@@ -180,6 +184,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
urldb = mysqlite.mysqlite(config.ppf.database, str)
|
urldb = mysqlite.mysqlite(config.ppf.database, str)
|
||||||
dbs.create_table_if_not_exists(urldb, 'uris')
|
dbs.create_table_if_not_exists(urldb, 'uris')
|
||||||
|
dbs.seed_proxy_sources(urldb)
|
||||||
import_from_file('import.txt', urldb)
|
import_from_file('import.txt', urldb)
|
||||||
if len(sys.argv) == 3 and sys.argv[1] == "--file":
|
if len(sys.argv) == 3 and sys.argv[1] == "--file":
|
||||||
sys.exit(import_proxies_from_file(proxydb, sys.argv[2]))
|
sys.exit(import_proxies_from_file(proxydb, sys.argv[2]))
|
||||||
@@ -191,6 +196,13 @@ if __name__ == '__main__':
|
|||||||
else:
|
else:
|
||||||
watcherd = None
|
watcherd = None
|
||||||
|
|
||||||
|
# start scraper if enabled
|
||||||
|
scraperd = None
|
||||||
|
if config.scraper.enabled:
|
||||||
|
import scraper
|
||||||
|
scraperd = scraper.Scraper(config)
|
||||||
|
scraperd.start()
|
||||||
|
|
||||||
qurl = 'SELECT url,stale_count,error,retrievals,proxies_added,content_type FROM uris WHERE error < ? and (check_time+?+((error+stale_count)*?) <?) ORDER BY RANDOM()'
|
qurl = 'SELECT url,stale_count,error,retrievals,proxies_added,content_type FROM uris WHERE error < ? and (check_time+?+((error+stale_count)*?) <?) ORDER BY RANDOM()'
|
||||||
threads = []
|
threads = []
|
||||||
rows = []
|
rows = []
|
||||||
@@ -238,9 +250,41 @@ if __name__ == '__main__':
|
|||||||
t.start()
|
t.start()
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
|
if scraperd:
|
||||||
|
scraperd.stop()
|
||||||
if watcherd:
|
if watcherd:
|
||||||
watcherd.stop()
|
watcherd.stop()
|
||||||
watcherd.finish()
|
watcherd.finish()
|
||||||
break
|
break
|
||||||
|
|
||||||
_log('ppf stopped', 'info')
|
_log('ppf stopped', 'info')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
config.load()
|
||||||
|
errors = config.validate()
|
||||||
|
if errors:
|
||||||
|
for e in errors:
|
||||||
|
_log(e, 'error')
|
||||||
|
sys.exit(1)
|
||||||
|
fetch.set_config(config)
|
||||||
|
|
||||||
|
# handle flags
|
||||||
|
if config.args.nobs:
|
||||||
|
set_nobs(True)
|
||||||
|
|
||||||
|
if config.args.profile:
|
||||||
|
_log('profiling enabled, output to profile.stats', 'info')
|
||||||
|
profiler = cProfile.Profile()
|
||||||
|
try:
|
||||||
|
profiler.enable()
|
||||||
|
main()
|
||||||
|
finally:
|
||||||
|
profiler.disable()
|
||||||
|
profiler.dump_stats('profile.stats')
|
||||||
|
_log('profile stats written to profile.stats', 'info')
|
||||||
|
# print top 20 by cumulative time
|
||||||
|
stats = pstats.Stats('profile.stats')
|
||||||
|
stats.strip_dirs().sort_stats('cumulative').print_stats(20)
|
||||||
|
else:
|
||||||
|
main()
|
||||||
|
|||||||
Reference in New Issue
Block a user