diff --git a/ppf.py b/ppf.py index 0eada2f..a23cbf8 100755 --- a/ppf.py +++ b/ppf.py @@ -84,13 +84,13 @@ def extract_urls(html, url): item = a['href'].encode('utf-8') if isinstance(a['href'], unicode) else a['href'] item = item.strip() - if is_bad_url(item): - continue - elif item.startswith('www.'): + if item.startswith('www.'): item = 'http://%s' % item elif not item.startswith('http'): if not item.startswith('/'): item = '/%s' % item item = '%s://%s%s' % (proto,domain,item) + elif is_bad_url(item): + continue if not item in urls: urls.append(item)