cleansing
This commit is contained in:
23
config.ini
23
config.ini
@@ -1,23 +0,0 @@
|
|||||||
[global]
|
|
||||||
tor_host = 127.0.0.1:9050
|
|
||||||
i2p_host = 127.0.0.1:4444
|
|
||||||
common_proxy_ports = 80, 1080, 3124, 3128, 4145, 4444, 8080, 8081, 8118, 8888, 9999
|
|
||||||
proxylist_reload_every = 180
|
|
||||||
database = proxylist.sqlite
|
|
||||||
proxy_max_fail = 5
|
|
||||||
|
|
||||||
[watcherd]
|
|
||||||
enabled = true
|
|
||||||
proxy_file = false
|
|
||||||
checktime = 1800
|
|
||||||
threads = 10
|
|
||||||
timeout = 15
|
|
||||||
read_timeout = 20
|
|
||||||
max_fail = 5
|
|
||||||
|
|
||||||
[proxyfind]
|
|
||||||
enabled = true
|
|
||||||
search = true
|
|
||||||
maxfail = 10
|
|
||||||
timeout = 30
|
|
||||||
threads = 3
|
|
||||||
@@ -16,72 +16,3 @@ def random_string(strlen=20):
|
|||||||
def _log(strng, level='info'):
|
def _log(strng, level='info'):
|
||||||
print '%s/%s\t%s' % (timestamp(), level, strng)
|
print '%s/%s\t%s' % (timestamp(), level, strng)
|
||||||
|
|
||||||
def option_matches_options(strng, items):
|
|
||||||
try: return [item for item in items if re.match(strng, item)]
|
|
||||||
except: return False
|
|
||||||
|
|
||||||
def prepare_socksocket(self, destination, path, path_item):
|
|
||||||
if path_item in self.paths and self.paths[path_item]['path'] == path:
|
|
||||||
self.paths[path_item]['path'] = False
|
|
||||||
|
|
||||||
#socks.setdefaultproxy()
|
|
||||||
# relay to i2p http proxy if *.i2p domain
|
|
||||||
if destination.endswith('i2p'):
|
|
||||||
proxy = random.choice(self.i2p_host).split(':')
|
|
||||||
path = False
|
|
||||||
# or go with tor
|
|
||||||
else:
|
|
||||||
proxies = [ rocksock.RocksockProxyFromURL('socks5://%s' % random.choice(self.tor_host)) ]
|
|
||||||
#socks.adddefaultproxy(*socks.parseproxy('tor://%s' % random.choice(self.tor_host)))
|
|
||||||
# add 'clearnet' proxies to the chain ?
|
|
||||||
if self.proxify and (not destination.endswith('onion') and not destination.endswith('.exit')):
|
|
||||||
|
|
||||||
# get a proxy path
|
|
||||||
path = build_path(self, path_item, path)
|
|
||||||
|
|
||||||
# if path isn't long enough, break
|
|
||||||
if not len(path): return False, False, False
|
|
||||||
|
|
||||||
# add chain...
|
|
||||||
#for inc in xrange(len(path) - 1): socks.adddefaultproxy(*socks.parseproxy('http://%s' % path[inc]))
|
|
||||||
#for inc in xrange(len(path) - 1): socks.adddefaultproxy(*socks.parseproxy('%s://%s' % (path[inc][1], path[inc][0])))
|
|
||||||
for inc in xrange(len(path)): proxies.append( rocksock.RocksockProxyFromURL('%s://%s' % (path[inc][1], path[inc][0])))
|
|
||||||
|
|
||||||
#return True, socks.socksocket, path
|
|
||||||
return True, proxies, path
|
|
||||||
|
|
||||||
def build_path(self, path_item, path):
|
|
||||||
|
|
||||||
chainlen = random.randint( self.path_len, (self.path_len + self.path_randomlen))
|
|
||||||
# if not enough proxies
|
|
||||||
# FIXME: try to get a proxylist from database
|
|
||||||
if len(self.proxylist) < chainlen: return []
|
|
||||||
|
|
||||||
# valid path already available
|
|
||||||
elif (path_item in self.paths and
|
|
||||||
self.paths[path_item]['path'] and
|
|
||||||
(time.time() - self.paths[path_item]['ticks']) < self.path_duration):
|
|
||||||
|
|
||||||
# take available path if any
|
|
||||||
if path != self.paths[path_item]['path']: path = self.paths[path_item]['path']
|
|
||||||
|
|
||||||
# or nope, none available
|
|
||||||
# build a new one from scratch
|
|
||||||
else:
|
|
||||||
path = []
|
|
||||||
avail = []
|
|
||||||
|
|
||||||
# dec chainlen if we have to select the exit proxy
|
|
||||||
if self.exitcountry is not None: chainlen -= 1
|
|
||||||
#avail = [item[0] for item in self.proxylist if not item[0] in avail and item[1] != str(self.exitcountry).upper()]
|
|
||||||
avail = [[item[0],item[2]] for item in self.proxylist if not item[0] in avail and item[1] != str(self.exitcountry).upper()]
|
|
||||||
path = random.sample(avail, chainlen)
|
|
||||||
|
|
||||||
# choose the exit proxy
|
|
||||||
if self.exitcountry is not None:
|
|
||||||
#avail = [item[0] for item in self.proxylist if not item[0] in path and item[1] == str(self.exitcountry).upper()]
|
|
||||||
avail = [[item[0],item[2]] for item in self.proxylist if not item[0] in path and item[1] == str(self.exitcountry).upper()]
|
|
||||||
if not len(avail): return []
|
|
||||||
path.append(random.choice(avail))
|
|
||||||
|
|
||||||
return path
|
|
||||||
|
|||||||
@@ -3,7 +3,6 @@
|
|||||||
from threading import Thread
|
from threading import Thread
|
||||||
import threading, commands
|
import threading, commands
|
||||||
import socket, time, random, sys, string, re
|
import socket, time, random, sys, string, re
|
||||||
#import sockschain as socks
|
|
||||||
import requests
|
import requests
|
||||||
#from geoip import geolite2
|
#from geoip import geolite2
|
||||||
|
|
||||||
|
|||||||
51
ppf.py
51
ppf.py
@@ -1,23 +1,16 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
import socket
|
|
||||||
import requests
|
import requests
|
||||||
import socks
|
|
||||||
import random, time
|
import random, time
|
||||||
import sqlite3
|
|
||||||
import re
|
import re
|
||||||
import urllib
|
import urllib
|
||||||
import threading
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import ipcalc
|
import ipcalc
|
||||||
from soup_parser import soupify
|
from soup_parser import soupify
|
||||||
from ConfigParser import SafeConfigParser
|
from ConfigParser import SafeConfigParser
|
||||||
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
||||||
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
||||||
from dns import resolver
|
|
||||||
#from selenium import webdriver
|
|
||||||
|
|
||||||
sys.path.append('./includes')
|
sys.path.append('./includes')
|
||||||
import mysqlite
|
import mysqlite
|
||||||
@@ -63,53 +56,21 @@ def import_from_file(fn, sqlite):
|
|||||||
sqlite.execute('INSERT INTO uris (added,url,check_time,error) VALUES (?,?,?,?)', (time.time(),u,0,1))
|
sqlite.execute('INSERT INTO uris (added,url,check_time,error) VALUES (?,?,?,?)', (time.time(),u,0,1))
|
||||||
sqlite.commit()
|
sqlite.commit()
|
||||||
|
|
||||||
def fetch_contents(uri, driver=None):
|
def fetch_contents(uri):
|
||||||
headers = base_header
|
headers = base_header
|
||||||
## use requests (default)
|
try: resp = requests.get(uri, timeout=45, headers=headers, verify=False, proxies=proxies)
|
||||||
if not driver:
|
except: return ''
|
||||||
try: resp = requests.get(uri, timeout=45, headers=headers, verify=False, proxies=proxies)
|
data = resp.text
|
||||||
except: return ''
|
|
||||||
data = resp.text
|
|
||||||
|
|
||||||
## phantomjs
|
|
||||||
else:
|
|
||||||
for key, value in enumerate(base_header):
|
|
||||||
capability_key = 'phantomjs.page.customHeaders.{}'.format(key)
|
|
||||||
webdriver.DesiredCapabilities.PHANTOMJS[capability_key] = value
|
|
||||||
|
|
||||||
service_args = ['--proxy=127.0.0.1:9050', '--proxy-type=socks5']
|
|
||||||
driver = webdriver.PhantomJS()
|
|
||||||
try:
|
|
||||||
driver.implicitly_wait(45)
|
|
||||||
driver.set_page_load_timeout(45)
|
|
||||||
driver.get(uri)
|
|
||||||
data = driver.page_source
|
|
||||||
|
|
||||||
except: data = ''
|
|
||||||
finally:driver.quit()
|
|
||||||
|
|
||||||
for retry_message in retry_messages:
|
for retry_message in retry_messages:
|
||||||
if retry_message in data: return ''
|
if retry_message in data: return ''
|
||||||
return data
|
|
||||||
|
|
||||||
def update_proxy_sources(sqlite, proxies, uri):
|
return data
|
||||||
for proxy in proxies:
|
|
||||||
md5sum = hashlib.md5(proxy).hexdigest()
|
|
||||||
sqlite.execute('CREATE TABLE IF NOT EXISTS "%s" (uri TEXT)' % md5sum)
|
|
||||||
sqlite.commit()
|
|
||||||
#check = [ i for i in sqlite.execute('SELECT uri FROM "%s" WHERE uri=?' % md5sum, (uri,)).fetchall() ]
|
|
||||||
check = sqlite.execute('SELECT uri FROM "%s" WHERE uri=?' % md5sum, (uri,)).fetchall()
|
|
||||||
if not len(check):
|
|
||||||
sqlite.execute('INSERT INTO "%s" (uri) VALUES(?)' % md5sum, (uri,))
|
|
||||||
sqlite.commit()
|
|
||||||
|
|
||||||
def insert_proxies(proxies, uri, sqlite):
|
def insert_proxies(proxies, uri, sqlite):
|
||||||
time_now = time.time()
|
time_now = time.time()
|
||||||
added = 0
|
added = 0
|
||||||
|
|
||||||
## very wasteful
|
|
||||||
#update_proxy_sources(sqlite, proxies, uri)
|
|
||||||
|
|
||||||
query = [ 'proxy=?' for p in proxies ]
|
query = [ 'proxy=?' for p in proxies ]
|
||||||
known = [ i[0] for i in sqlite.execute('SELECT proxy FROM proxylist WHERE %s' % ' OR '.join(query), proxies).fetchall() ]
|
known = [ i[0] for i in sqlite.execute('SELECT proxy FROM proxylist WHERE %s' % ' OR '.join(query), proxies).fetchall() ]
|
||||||
new = [ (time_now,i,3,0) for i in proxies if not i in known ]
|
new = [ (time_now,i,3,0) for i in proxies if not i in known ]
|
||||||
@@ -161,7 +122,7 @@ def proxyleech(sqlite, rows):
|
|||||||
#print('entering proxyleech...')
|
#print('entering proxyleech...')
|
||||||
|
|
||||||
for row in rows:
|
for row in rows:
|
||||||
try: content = fetch_contents(row[0], None)
|
try: content = fetch_contents(row[0])
|
||||||
except: content = ''
|
except: content = ''
|
||||||
|
|
||||||
uniques = []
|
uniques = []
|
||||||
|
|||||||
41
striphtml.py
41
striphtml.py
@@ -1,41 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
from HTMLParser import HTMLParser
|
|
||||||
import requests
|
|
||||||
import re
|
|
||||||
from selenium.webdriver.common.proxy import *
|
|
||||||
from selenium import webdriver
|
|
||||||
from selenium.webdriver.common.by import By
|
|
||||||
phantomjs_path = '/home/mickael/bin/phantomjs'
|
|
||||||
def cleanhtml(raw_html):
|
|
||||||
cleanr = re.compile('<.*?>')
|
|
||||||
cleantext = re.sub(cleanr, ':', raw_html)
|
|
||||||
cleantext = re.sub('::+',':', cleantext)
|
|
||||||
return cleantext
|
|
||||||
|
|
||||||
class MLStripper(HTMLParser):
|
|
||||||
def __init__(self):
|
|
||||||
self.reset()
|
|
||||||
self.fed = []
|
|
||||||
def handle_data(self, d):
|
|
||||||
self.fed.append(d)
|
|
||||||
def get_data(self):
|
|
||||||
return ''.join(self.fed)
|
|
||||||
|
|
||||||
def strip_tags(html):
|
|
||||||
s = MLStripper()
|
|
||||||
s.feed(html)
|
|
||||||
return s.get_data()
|
|
||||||
|
|
||||||
service_args = ['--proxy=127.0.0.1:9050', '--proxy-type=socks5']
|
|
||||||
driver = webdriver.PhantomJS(phantomjs_path,service_args=service_args)
|
|
||||||
try: driver.get('http://www.proxz.com/proxy_list_fr_1_ext.html')
|
|
||||||
except: sys.exit(0)
|
|
||||||
html = driver.page_source
|
|
||||||
driver.quit()
|
|
||||||
|
|
||||||
text = cleanhtml(html)
|
|
||||||
proxies = sorted(re.findall(r'[0-9]+(?:\.[0-9]+){3}:[0-9]+', text))
|
|
||||||
|
|
||||||
print(text)
|
|
||||||
print(proxies)
|
|
||||||
Reference in New Issue
Block a user