check if bad url *after* building the url
This commit is contained in:
6
ppf.py
6
ppf.py
@@ -84,13 +84,13 @@ def extract_urls(html, url):
|
||||
item = a['href'].encode('utf-8') if isinstance(a['href'], unicode) else a['href']
|
||||
item = item.strip()
|
||||
|
||||
if is_bad_url(item):
|
||||
continue
|
||||
elif item.startswith('www.'):
|
||||
if item.startswith('www.'):
|
||||
item = 'http://%s' % item
|
||||
elif not item.startswith('http'):
|
||||
if not item.startswith('/'): item = '/%s' % item
|
||||
item = '%s://%s%s' % (proto,domain,item)
|
||||
elif is_bad_url(item):
|
||||
continue
|
||||
|
||||
if not item in urls: urls.append(item)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user