tabs to space

2021-02-06 12:14:22 +01:00
parent 9aa2c91f41
commit 01bded472f
1 changed files with 175 additions and 175 deletions
@@ -15,231 +15,231 @@ import threading
 config = Config()
 def import_from_file(fn, sqlite):
-	with open(fn, 'r') as f:
+  with open(fn, 'r') as f:
-		urls = [ url for url in f.read().split('\n') if url != '' ]
+    urls = [ url for url in f.read().split('\n') if url != '' ]
-		dbs.insert_urls(urls, 'import.txt', urldb)
+    dbs.insert_urls(urls, 'import.txt', urldb)
 def get_content_type(url):
-	hdr = fetch.fetch_contents(url, head=True)
+  hdr = fetch.fetch_contents(url, head=True)
-	for h in hdr.split('\n'):
+  for h in hdr.split('\n'):
-		if h.lower().startswith('content-type: '): return h.lower().split(':')[1].strip()
+    if h.lower().startswith('content-type: '): return h.lower().split(':')[1].strip()
-	return ''
+  return ''
 def is_good_content_type(string):
-	allowed_ct = [ 'text/html', 'text/plain', 'atom+xml' ]
+  allowed_ct = [ 'text/html', 'text/plain', 'atom+xml' ]
-	for ct in allowed_ct:
+  for ct in allowed_ct:
-		if ct.lower() in string.lower(): return True
+    if ct.lower() in string.lower(): return True
-	return False
+  return False
 def proxyleech(proxydb, urldb, url, stale_count, error, retrievals, proxies_added, content_type):
-	if not content_type: content_type = get_content_type(url)
+  if not content_type: content_type = get_content_type(url)
-	if is_good_content_type(content_type):
+  if is_good_content_type(content_type):
-		try: content = fetch.fetch_contents(url)
+    try: content = fetch.fetch_contents(url)
-		except KeyboardInterrupt as e: raise e
+    except KeyboardInterrupt as e: raise e
-		except: content = ''
+    except: content = ''
-	else:
+  else:
-		content = ''
+    content = ''
-	unique_count, new = fetch.extract_proxies(content, proxydb)
+  unique_count, new = fetch.extract_proxies(content, proxydb)
-	if retrievals == 0:  # new site
+  if retrievals == 0:  # new site
-		if content != '' and unique_count == 0: # site works but has zero proxy addresses
+    if content != '' and unique_count == 0: # site works but has zero proxy addresses
-			error = 99999
+      error = 99999
-	else:
+  else:
-		if len(new) == 0:
+    if len(new) == 0:
-			stale_count += 1
+      stale_count += 1
-		else:
+    else:
-			extract_urls(content, url)
+      extract_urls(content, url)
-			stale_count = 0
+      stale_count = 0
-		if content == '':
+    if content == '':
-			error += 1
+      error += 1
-		else:
+    else:
-			retrievals += 1
+      retrievals += 1
-			error = 0
+      error = 0
-	urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=?,retrievals=?,proxies_added=?,content_type=? where url=?', (error, stale_count, int(time.time()), retrievals, proxies_added+len(new), content_type, url))
+  urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=?,retrievals=?,proxies_added=?,content_type=? where url=?', (error, stale_count, int(time.time()), retrievals, proxies_added+len(new), content_type, url))
-	urldb.commit()
+  urldb.commit()
-	if not len(new): return
+  if not len(new): return
-	dbs.insert_proxies(proxydb, new, url)
+  dbs.insert_proxies(proxydb, new, url)
 def is_bad_url(uri, domain=None, samedomain=False):
-	# if uri needs to be from same domain and domains missmatch
+  # if uri needs to be from same domain and domains missmatch
-	if samedomain and str(uri.split('/')[2]).lower() != str(domain).lower():
+  if samedomain and str(uri.split('/')[2]).lower() != str(domain).lower():
-		return True
+    return True
-	for u in urignore:
+  for u in urignore:
-		if re.findall(u, uri): return True
+    if re.findall(u, uri): return True
-	return False
+  return False
 def extract_urls(html, url):
-	mytime = int(time.time())
+  mytime = int(time.time())
-	proto = url.split(':')[0]
+  proto = url.split(':')[0]
-	domain = url.split('/')[2]
+  domain = url.split('/')[2]
-	urls = []
+  urls = []
-	soup = BeautifulSoup(html, features='lxml')
+  soup = BeautifulSoup(html, features='lxml')
-	for a in soup.find_all('a', href=True):
+  for a in soup.find_all('a', href=True):
-		item = a['href'].encode('utf-8') if isinstance(a['href'], unicode) else a['href']
+    item = a['href'].encode('utf-8') if isinstance(a['href'], unicode) else a['href']
-		item = item.strip()
+    item = item.strip()
-		if item.startswith('www.'):
+    if item.startswith('www.'):
-			item = 'http://%s' % item
+      item = 'http://%s' % item
-		elif not item.startswith('http'):
+    elif not item.startswith('http'):
-			if not item.startswith('/'): item = '/%s' % item
+      if not item.startswith('/'): item = '/%s' % item
-			item = '%s://%s%s' % (proto,domain,item)
+      item = '%s://%s%s' % (proto,domain,item)
-		elif is_bad_url(item, domain=domain, samedomain=config.ppf.extract_samedomain):
+    elif is_bad_url(item, domain=domain, samedomain=config.ppf.extract_samedomain):
-			continue
+      continue
-		if not item in urls: urls.append(item)
+    if not item in urls: urls.append(item)
-	if len(urls): dbs.insert_urls(urls, url, urldb) #insert_if_not_exists(urls)
+  if len(urls): dbs.insert_urls(urls, url, urldb) #insert_if_not_exists(urls)
 def import_proxies_from_file(proxydb, fn):
-	content = open(fn, 'r').read()
+  content = open(fn, 'r').read()
-	unique_count, new = fetch.extract_proxies(content, proxydb)
+  unique_count, new = fetch.extract_proxies(content, proxydb)
-	if len(new):
+  if len(new):
-		dbs.insert_proxies(proxydb, new, fn)
+    dbs.insert_proxies(proxydb, new, fn)
-		return 0
+    return 0
-	return 1
+  return 1
 def serve_loop(hs, done):
-        client_threads = []
+  client_threads = []
-        while not done.is_set():
+  while not done.is_set():
-                c = hs.wait_client()
+    c = hs.wait_client()
-                evt_done = threading.Event()
+    evt_done = threading.Event()
-                cthread = threading.Thread(target=httpsrv_client_thread, args=(c,evt_done))
+    cthread = threading.Thread(target=httpsrv_client_thread, args=(c,evt_done))
-                cthread.daemon = True
+    cthread.daemon = True
-                cthread.start()
+    cthread.start()
-                ctrm = []
+    ctrm = []
-                for ct, ct_done in client_threads:
+    for ct, ct_done in client_threads:
-                        if ct_done.is_set():
+      if ct_done.is_set():
-                                ctrm.append((ct,ct_done))
+        ctrm.append((ct,ct_done))
-                                ct.join()
+        ct.join()
-                if len(ctrm):
+    if len(ctrm):
-                        client_threads = [ x for x in client_threads if not x in ctrm ]
+      client_threads = [ x for x in client_threads if not x in ctrm ]
-                client_threads.append((cthread, evt_done))
+    client_threads.append((cthread, evt_done))
 def forbidden_page():
-        return (
+  return (
-                '<!DOCTYPE html>\n'
+    '<!DOCTYPE html>\n'
-                '  <head>\n'
+    '  <head>\n'
-                '    <style>div.e{position:fixed;top:25%;bottom:25%;left:25%;right:25%;font-size:150px;text-align:center;}</style>\n'
+    '    <style>div.e{position:fixed;top:25%;bottom:25%;left:25%;right:25%;font-size:150px;text-align:center;}</style>\n'
-                '    <title>Forbidden</title>\n'
+    '    <title>Forbidden</title>\n'
-                '  </head>\n'
+    '  </head>\n'
-                '  <body>\n'
+    '  <body>\n'
-                '    <div class="e">&#128405;</div>\n'
+    '    <div class="e">&#128405;</div>\n'
-                '  </body>\n'
+    '  </body>\n'
-                '</html>')
+    '</html>')
 def httpsrv_client_thread(c, evt_done):
-        req = c.read_request()
+  req = c.read_request()
-        if req is None: pass
+  if req is None: pass
-        elif len(watchlist) == 0:
+  elif len(watchlist) == 0:
-                c.redirect('/config.html')
+    c.redirect('/config.html')
-        elif os.path.isdir(req['url'][1:]):
+  elif os.path.isdir(req['url'][1:]):
-                c.send(403,'Forbidden', forbidden_page())
+    c.send(403,'Forbidden', forbidden_page())
-        elif req['url'] == '/':
+  elif req['url'] == '/':
-                c.redirect('/index.html')
+    c.redirect('/index.html')
-        elif req['url'].startswith('/index.html'):
+  elif req['url'].startswith('/index.html'):
-                variables = variables_from_request(req)
+    variables = variables_from_request(req)
-                r, redir = render_site(variables)
+    r, redir = render_site(variables)
-                if redir is not "":
+    if redir is not "":
-                        c.redirect(redir)
+      c.redirect(redir)
-                else:
+    else:
-                        if r == '': r = render_empty(variables=variables)
+      if r == '': r = render_empty(variables=variables)
-                        c.send(200, "OK", r)
+      c.send(200, "OK", r)
-        elif not '..' in req['url'] and file_exists(os.getcwd() + req['url']):
+  elif not '..' in req['url'] and file_exists(os.getcwd() + req['url']):
-                c.serve_file(os.getcwd() + req['url'])
+    c.serve_file(os.getcwd() + req['url'])
-        elif req['url'] == '/robots.txt':
+  elif req['url'] == '/robots.txt':
-                c.send(200, "OK", "User-agent: *\nDisallow: /")
+    c.send(200, "OK", "User-agent: *\nDisallow: /")
-        elif req['url'].startswith('/config.html'):
+  elif req['url'].startswith('/config.html'):
-                if args.config > 0:
+    if args.config > 0:
-                        variables=variables_from_request(req)
+      variables=variables_from_request(req)
-                        r, redir = configpage(req,variables)
+      r, redir = configpage(req,variables)
-                else:
+    else:
-                        redir = '/index.html'
+      redir = '/index.html'
-                if redir is not "":
+    if redir is not "":
-                        c.redirect(redir)
+      c.redirect(redir)
-                else:
+    else:
-                        if r == '': r = render_empty(variables=variables)
+      if r == '': r = render_empty(variables=variables)
-                        c.send(200, "OK", r)
+      c.send(200, "OK", r)
-        else:
+  else:
-                c.send(404, "not exist", "the reqested file not exist!!!1")
+    c.send(404, "not exist", "the reqested file not exist!!!1")
-        c.disconnect()
+  c.disconnect()
-        evt_done.set()
+  evt_done.set()
 def start_server(ip, port):
-        done = threading.Event()
+  done = threading.Event()
-        from httpsrv import HttpSrv
+  from httpsrv import HttpSrv
-        hs = HttpSrv(ip, port)
+  hs = HttpSrv(ip, port)
-        try:
+  try:
-                hs.setup()
+    hs.setup()
-        except socket.error as e:
+  except socket.error as e:
-                if e.errno == errno.EADDRINUSE:
+    if e.errno == errno.EADDRINUSE:
-                        sys.stderr.write((
+      sys.stderr.write((
-                                "ERROR: server socket address in use\n"
+        "ERROR: server socket address in use\n"
-                                "wait a couple seconds and try again.\n"
+        "wait a couple seconds and try again.\n"
-                                "in case you're in pdb, you need to quit it\n"))
+        "in case you're in pdb, you need to quit it\n"))
-                        sys.exit(1)
+      sys.exit(1)
-                else:
+    else:
-                        raise e
+      raise e
-        t = threading.Thread(target=serve_loop, args=(hs, done))
+  t = threading.Thread(target=serve_loop, args=(hs, done))
-        t.daemon = True
+  t.daemon = True
-        t.start()
+  t.start()
-        return t, done
+  return t, done
 if __name__ == '__main__':
-	config.load()
+  config.load()
-	fetch.set_config(config)
+  fetch.set_config(config)
-	proxydb = mysqlite.mysqlite(config.watchd.database, str)
+  proxydb = mysqlite.mysqlite(config.watchd.database, str)
-	dbs.create_table_if_not_exists(proxydb, 'proxylist')
+  dbs.create_table_if_not_exists(proxydb, 'proxylist')
-	with open('urignore.txt', 'r') as f:
+  with open('urignore.txt', 'r') as f:
-		urignore = [ i.strip() for i in f.read().split('\n') if len(i.strip()) ]
+    urignore = [ i.strip() for i in f.read().split('\n') if len(i.strip()) ]
-	urldb = mysqlite.mysqlite(config.ppf.database, str)
+  urldb = mysqlite.mysqlite(config.ppf.database, str)
-	dbs.create_table_if_not_exists(urldb, 'uris')
+  dbs.create_table_if_not_exists(urldb, 'uris')
-	import_from_file('import.txt', urldb)
+  import_from_file('import.txt', urldb)
-	if len(sys.argv) == 3 and sys.argv[1] == "--file":
+  if len(sys.argv) == 3 and sys.argv[1] == "--file":
-		sys.exit(import_proxies_from_file(proxydb, sys.argv[2]))
+    sys.exit(import_proxies_from_file(proxydb, sys.argv[2]))
-	# start proxy watcher
+  # start proxy watcher
-	if config.watchd.threads > 0:
+  if config.watchd.threads > 0:
-		watcherd = proxywatchd.Proxywatchd()
+    watcherd = proxywatchd.Proxywatchd()
-		watcherd.start()
+    watcherd.start()
-	else:
+  else:
-		watcherd = None
+    watcherd = None
-	start_server(config.httpd.listenip, config.httpd.port)
+  start_server(config.httpd.listenip, config.httpd.port)
-	while True:
+  while True:
-		try:
+    try:
-			## any site that needs to be checked ?
+      ## any site that needs to be checked ?
-			rows = urldb.execute('SELECT url,stale_count,error,retrievals,proxies_added,content_type FROM uris WHERE error < ? and (check_time+?+((error+stale_count)*?) <?) ORDER BY RANDOM() LIMIT 25', (config.ppf.max_fail, config.ppf.checktime, config.ppf.perfail_checktime, int(time.time()))).fetchall()
+      rows = urldb.execute('SELECT url,stale_count,error,retrievals,proxies_added,content_type FROM uris WHERE error < ? and (check_time+?+((error+stale_count)*?) <?) ORDER BY RANDOM() LIMIT 25', (config.ppf.max_fail, config.ppf.checktime, config.ppf.perfail_checktime, int(time.time()))).fetchall()
-			if not len(rows): time.sleep(10)
+      if not len(rows): time.sleep(10)
-			for row in rows:
+      for row in rows:
-				proxyleech(proxydb, urldb, row[0], row[1], row[2], row[3], row[4], row[5])
+        proxyleech(proxydb, urldb, row[0], row[1], row[2], row[3], row[4], row[5])
-		except KeyboardInterrupt:
+    except KeyboardInterrupt:
-			if watcherd:
+      if watcherd:
-				watcherd.stop()
+        watcherd.stop()
-				watcherd.finish()
+        watcherd.finish()
-			break
+      break
-	print '\r',
+  print '\r',