ppf: strip extraced uris
This commit is contained in:
1
ppf.py
1
ppf.py
@@ -82,6 +82,7 @@ def extract_urls(html, url):
|
|||||||
|
|
||||||
for a in soup.find_all('a', href=True):
|
for a in soup.find_all('a', href=True):
|
||||||
item = a['href'].encode('utf-8') if isinstance(a['href'], unicode) else a['href']
|
item = a['href'].encode('utf-8') if isinstance(a['href'], unicode) else a['href']
|
||||||
|
item = item.strip()
|
||||||
|
|
||||||
if is_bad_url(item):
|
if is_bad_url(item):
|
||||||
continue
|
continue
|
||||||
|
|||||||
Reference in New Issue
Block a user