cleansing
This commit is contained in:
51
ppf.py
51
ppf.py
@@ -1,23 +1,16 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import socket
|
||||
import requests
|
||||
import socks
|
||||
import random, time
|
||||
import sqlite3
|
||||
import re
|
||||
import urllib
|
||||
import threading
|
||||
import hashlib
|
||||
import ipcalc
|
||||
from soup_parser import soupify
|
||||
from ConfigParser import SafeConfigParser
|
||||
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
||||
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
||||
from dns import resolver
|
||||
#from selenium import webdriver
|
||||
|
||||
sys.path.append('./includes')
|
||||
import mysqlite
|
||||
@@ -63,53 +56,21 @@ def import_from_file(fn, sqlite):
|
||||
sqlite.execute('INSERT INTO uris (added,url,check_time,error) VALUES (?,?,?,?)', (time.time(),u,0,1))
|
||||
sqlite.commit()
|
||||
|
||||
def fetch_contents(uri, driver=None):
|
||||
def fetch_contents(uri):
|
||||
headers = base_header
|
||||
## use requests (default)
|
||||
if not driver:
|
||||
try: resp = requests.get(uri, timeout=45, headers=headers, verify=False, proxies=proxies)
|
||||
except: return ''
|
||||
data = resp.text
|
||||
|
||||
## phantomjs
|
||||
else:
|
||||
for key, value in enumerate(base_header):
|
||||
capability_key = 'phantomjs.page.customHeaders.{}'.format(key)
|
||||
webdriver.DesiredCapabilities.PHANTOMJS[capability_key] = value
|
||||
|
||||
service_args = ['--proxy=127.0.0.1:9050', '--proxy-type=socks5']
|
||||
driver = webdriver.PhantomJS()
|
||||
try:
|
||||
driver.implicitly_wait(45)
|
||||
driver.set_page_load_timeout(45)
|
||||
driver.get(uri)
|
||||
data = driver.page_source
|
||||
|
||||
except: data = ''
|
||||
finally:driver.quit()
|
||||
try: resp = requests.get(uri, timeout=45, headers=headers, verify=False, proxies=proxies)
|
||||
except: return ''
|
||||
data = resp.text
|
||||
|
||||
for retry_message in retry_messages:
|
||||
if retry_message in data: return ''
|
||||
return data
|
||||
|
||||
def update_proxy_sources(sqlite, proxies, uri):
|
||||
for proxy in proxies:
|
||||
md5sum = hashlib.md5(proxy).hexdigest()
|
||||
sqlite.execute('CREATE TABLE IF NOT EXISTS "%s" (uri TEXT)' % md5sum)
|
||||
sqlite.commit()
|
||||
#check = [ i for i in sqlite.execute('SELECT uri FROM "%s" WHERE uri=?' % md5sum, (uri,)).fetchall() ]
|
||||
check = sqlite.execute('SELECT uri FROM "%s" WHERE uri=?' % md5sum, (uri,)).fetchall()
|
||||
if not len(check):
|
||||
sqlite.execute('INSERT INTO "%s" (uri) VALUES(?)' % md5sum, (uri,))
|
||||
sqlite.commit()
|
||||
return data
|
||||
|
||||
def insert_proxies(proxies, uri, sqlite):
|
||||
time_now = time.time()
|
||||
added = 0
|
||||
|
||||
## very wasteful
|
||||
#update_proxy_sources(sqlite, proxies, uri)
|
||||
|
||||
query = [ 'proxy=?' for p in proxies ]
|
||||
known = [ i[0] for i in sqlite.execute('SELECT proxy FROM proxylist WHERE %s' % ' OR '.join(query), proxies).fetchall() ]
|
||||
new = [ (time_now,i,3,0) for i in proxies if not i in known ]
|
||||
@@ -161,7 +122,7 @@ def proxyleech(sqlite, rows):
|
||||
#print('entering proxyleech...')
|
||||
|
||||
for row in rows:
|
||||
try: content = fetch_contents(row[0], None)
|
||||
try: content = fetch_contents(row[0])
|
||||
except: content = ''
|
||||
|
||||
uniques = []
|
||||
|
||||
Reference in New Issue
Block a user