diff --git a/ppf.py b/ppf.py index 65ada92..cbe6c5c 100755 --- a/ppf.py +++ b/ppf.py @@ -41,43 +41,6 @@ def is_good_content_type(string): if ct.lower() in string.lower(): return True return False -def proxyleech(proxydb, urldb, url, stale_count, error, retrievals, proxies_added, content_type, proxy): - if not content_type: content_type = get_content_type(url, proxy=proxy) - - if is_good_content_type(content_type, proxy=proxy): - try: content = fetch.fetch_contents(url, proxy=proxy) - except KeyboardInterrupt as e: raise e - except: raise - #except: content = '' - else: - content = '' - - unique_count, new = fetch.extract_proxies(content, proxydb) - - if retrievals == 0: # new site - if content != '' and unique_count == 0: # site works but has zero proxy addresses - error = 99999 - else: - if len(new) == 0: - stale_count += 1 - else: - stale_count = 0 - if content == '': - error += 1 - else: - retrievals += 1 - error = 0 - if unique_count: - extract_urls(content, url) - - urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=?,retrievals=?,proxies_added=?,content_type=? where url=?', (error, stale_count, int(time.time()), retrievals, proxies_added+len(new), content_type, url)) - urldb.commit() - - if not len(new): return - - dbs.insert_proxies(proxydb, new, url) - - def is_bad_url(uri, domain=None, samedomain=False): # if uri needs to be from same domain and domains missmatch if samedomain and str(uri.split('/')[2]).lower() != str(domain).lower(): @@ -118,103 +81,16 @@ def import_proxies_from_file(proxydb, fn): return 0 return 1 -def serve_loop(hs, done): - client_threads = [] - while not done.is_set(): - c = hs.wait_client() - - evt_done = threading.Event() - cthread = threading.Thread(target=httpsrv_client_thread, args=(c,evt_done)) - cthread.daemon = True - cthread.start() - - ctrm = [] - for ct, ct_done in client_threads: - if ct_done.is_set(): - ctrm.append((ct,ct_done)) - ct.join() - - if len(ctrm): - client_threads = [ x for x in client_threads if not x in ctrm ] - - client_threads.append((cthread, evt_done)) - -def forbidden_page(): - return ( - '\n' - ' \n' - ' \n' - ' Forbidden\n' - ' \n' - ' \n' - '
🖕
\n' - ' \n' - '') - -def httpsrv_client_thread(c, evt_done): - req = c.read_request() - if req is None: pass - elif len(watchlist) == 0: - c.redirect('/config.html') - elif os.path.isdir(req['url'][1:]): - c.send(403,'Forbidden', forbidden_page()) - elif req['url'] == '/': - c.redirect('/index.html') - elif req['url'].startswith('/index.html'): - variables = variables_from_request(req) - r, redir = render_site(variables) - if redir is not "": - c.redirect(redir) - else: - if r == '': r = render_empty(variables=variables) - c.send(200, "OK", r) - elif not '..' in req['url'] and file_exists(os.getcwd() + req['url']): - c.serve_file(os.getcwd() + req['url']) - elif req['url'] == '/robots.txt': - c.send(200, "OK", "User-agent: *\nDisallow: /") - - elif req['url'].startswith('/config.html'): - if args.config > 0: - variables=variables_from_request(req) - r, redir = configpage(req,variables) - else: - redir = '/index.html' - if redir is not "": - c.redirect(redir) - else: - if r == '': r = render_empty(variables=variables) - c.send(200, "OK", r) - - else: - c.send(404, "not exist", "the reqested file not exist!!!1") - c.disconnect() - evt_done.set() - -def start_server(ip, port): - done = threading.Event() - from httpsrv import HttpSrv - hs = HttpSrv(ip, port) - try: - hs.setup() - except socket.error as e: - if e.errno == errno.EADDRINUSE: - sys.stderr.write(( - "ERROR: server socket address in use\n" - "wait a couple seconds and try again.\n" - "in case you're in pdb, you need to quit it\n")) - sys.exit(1) - else: - raise e - - t = threading.Thread(target=serve_loop, args=(hs, done)) - t.daemon = True - t.start() - return t, done - def extract_proxies(content): + """Extract and normalize proxy addresses from content.""" matches = re.findall(r'([0-9]+(?:\.[0-9]+){3}:[0-9]{2,5})[\D$]', fetch.cleanhtml(content)) uniques_dict = {} for p in matches: + # Cleanse IP (remove leading zeros) and port + ip, port = p.split(':') + ip = '.'.join(str(int(octet)) for octet in ip.split('.')) + port = int(port.lstrip('0') or '0') + p = '%s:%s' % (ip, port) uniques_dict[p] = True uniques = [] @@ -225,7 +101,6 @@ def extract_proxies(content): class Leechered(threading.Thread): - #def __init__(self, proxydb, urldb, url, stale_count, error, retrievals, proxies_added, content_type): def __init__(self, url, stale_count, error, retrievals, proxies_added, content_type, proxy): self.status = 'nok' self.proxylist = [] @@ -324,14 +199,9 @@ if __name__ == '__main__': else: watcherd = None - #start_server(config.httpd.listenip, config.httpd.port) - - #qurl = 'SELECT url,stale_count,error,retrievals,proxies_added,content_type FROM uris WHERE error < ? and (check_time+?+((error+stale_count)*?)