42 lines
1.1 KiB
Python
42 lines
1.1 KiB
Python
#!/usr/bin/env python
|
|
|
|
from HTMLParser import HTMLParser
|
|
import requests
|
|
import re
|
|
from selenium.webdriver.common.proxy import *
|
|
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
phantomjs_path = '/home/mickael/bin/phantomjs'
|
|
def cleanhtml(raw_html):
|
|
cleanr = re.compile('<.*?>')
|
|
cleantext = re.sub(cleanr, ':', raw_html)
|
|
cleantext = re.sub('::+',':', cleantext)
|
|
return cleantext
|
|
|
|
class MLStripper(HTMLParser):
|
|
def __init__(self):
|
|
self.reset()
|
|
self.fed = []
|
|
def handle_data(self, d):
|
|
self.fed.append(d)
|
|
def get_data(self):
|
|
return ''.join(self.fed)
|
|
|
|
def strip_tags(html):
|
|
s = MLStripper()
|
|
s.feed(html)
|
|
return s.get_data()
|
|
|
|
service_args = ['--proxy=127.0.0.1:9050', '--proxy-type=socks5']
|
|
driver = webdriver.PhantomJS(phantomjs_path,service_args=service_args)
|
|
try: driver.get('http://www.proxz.com/proxy_list_fr_1_ext.html')
|
|
except: sys.exit(0)
|
|
html = driver.page_source
|
|
driver.quit()
|
|
|
|
text = cleanhtml(html)
|
|
proxies = sorted(re.findall(r'[0-9]+(?:\.[0-9]+){3}:[0-9]+', text))
|
|
|
|
print(text)
|
|
print(proxies)
|