feat: worker-driven discovery and validation tightening #1
65
ppf.py
65
ppf.py
@@ -369,6 +369,71 @@ def worker_send_heartbeat(server_url, worker_key, tor_ok, tor_ip=None, profiling
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def worker_claim_urls(server_url, worker_key, count=5):
|
||||||
|
"""Claim batch of URLs for V2 worker mode."""
|
||||||
|
url = '%s/api/claim-urls?key=%s&count=%d' % (server_url.rstrip('/'), worker_key, count)
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = urllib2.urlopen(url, timeout=30)
|
||||||
|
result = json.loads(resp.read())
|
||||||
|
return result.get('urls', [])
|
||||||
|
except urllib2.HTTPError as e:
|
||||||
|
if e.code == 403:
|
||||||
|
_log('worker key rejected (403), need to re-register', 'warn')
|
||||||
|
raise NeedReregister()
|
||||||
|
_log('failed to claim urls: %s' % e, 'error')
|
||||||
|
return []
|
||||||
|
except Exception as e:
|
||||||
|
_log('failed to claim urls: %s' % e, 'error')
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def worker_report_urls(server_url, worker_key, reports):
|
||||||
|
"""Report URL fetch results to master."""
|
||||||
|
url = '%s/api/report-urls?key=%s' % (server_url.rstrip('/'), worker_key)
|
||||||
|
data = json.dumps({'reports': reports})
|
||||||
|
|
||||||
|
req = urllib2.Request(url, data)
|
||||||
|
req.add_header('Content-Type', 'application/json')
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = urllib2.urlopen(req, timeout=30)
|
||||||
|
result = json.loads(resp.read())
|
||||||
|
return result.get('processed', 0)
|
||||||
|
except urllib2.HTTPError as e:
|
||||||
|
if e.code == 403:
|
||||||
|
_log('worker key rejected (403), need to re-register', 'warn')
|
||||||
|
raise NeedReregister()
|
||||||
|
_log('failed to report urls: %s' % e, 'error')
|
||||||
|
return 0
|
||||||
|
except Exception as e:
|
||||||
|
_log('failed to report urls: %s' % e, 'error')
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def worker_report_proxies(server_url, worker_key, proxies):
|
||||||
|
"""Report working proxies to master."""
|
||||||
|
url = '%s/api/report-proxies?key=%s' % (server_url.rstrip('/'), worker_key)
|
||||||
|
data = json.dumps({'proxies': proxies})
|
||||||
|
|
||||||
|
req = urllib2.Request(url, data)
|
||||||
|
req.add_header('Content-Type', 'application/json')
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = urllib2.urlopen(req, timeout=30)
|
||||||
|
result = json.loads(resp.read())
|
||||||
|
return result.get('processed', 0)
|
||||||
|
except urllib2.HTTPError as e:
|
||||||
|
if e.code == 403:
|
||||||
|
_log('worker key rejected (403), need to re-register', 'warn')
|
||||||
|
raise NeedReregister()
|
||||||
|
_log('failed to report proxies: %s' % e, 'error')
|
||||||
|
return 0
|
||||||
|
except Exception as e:
|
||||||
|
_log('failed to report proxies: %s' % e, 'error')
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def check_tor_connectivity(tor_hosts):
|
def check_tor_connectivity(tor_hosts):
|
||||||
"""Test Tor connectivity. Returns (working_hosts, tor_ip)."""
|
"""Test Tor connectivity. Returns (working_hosts, tor_ip)."""
|
||||||
import socket
|
import socket
|
||||||
|
|||||||
Reference in New Issue
Block a user