68 lines
1.6 KiB
Python
68 lines
1.6 KiB
Python
#!/usr/bin/env python2
|
|
# test --nobs functionality (stdlib HTML parsing without BeautifulSoup)
|
|
|
|
import sys
|
|
|
|
OK = "[OK]"
|
|
FAIL = "[FAIL]"
|
|
|
|
# Test 1: Import soup_parser
|
|
print("Test 1: Import soup_parser")
|
|
try:
|
|
import soup_parser
|
|
print(" %s soup_parser imported" % OK)
|
|
except Exception as e:
|
|
print(" %s Failed: %s" % (FAIL, e))
|
|
sys.exit(1)
|
|
|
|
# Test 2: Check bs4 availability detection
|
|
print("Test 2: Check bs4 availability")
|
|
print(" bs4 available: %s" % soup_parser.is_available())
|
|
|
|
# Test 3: Test stdlib parser (force --nobs mode)
|
|
print("Test 3: Test stdlib parser")
|
|
soup_parser.set_nobs(True)
|
|
|
|
html = '''
|
|
<html>
|
|
<body>
|
|
<a href="http://example.com/proxy1">Link 1</a>
|
|
<a href="http://example.com/proxy2" rel="noreferrer">Link 2</a>
|
|
<a>No href</a>
|
|
<p>Not a link</p>
|
|
</body>
|
|
</html>
|
|
'''
|
|
|
|
soup = soup_parser.soupify(html, nohtml=True)
|
|
links = soup.find_all('a', href=True)
|
|
|
|
print(" Found %d links with href" % len(links))
|
|
if len(links) == 2:
|
|
print(" %s Correct count" % OK)
|
|
else:
|
|
print(" %s Expected 2, got %d" % (FAIL, len(links)))
|
|
sys.exit(1)
|
|
|
|
# Test 4: Verify link attributes
|
|
print("Test 4: Verify link attributes")
|
|
for i, link in enumerate(links):
|
|
href = link['href']
|
|
print(" Link %d: %s" % (i+1, href))
|
|
if not href.startswith('http://example.com/'):
|
|
print(" %s Unexpected href" % FAIL)
|
|
sys.exit(1)
|
|
print(" %s All hrefs correct" % OK)
|
|
|
|
# Test 5: Test attrs access (used by fetch.py)
|
|
print("Test 5: Test attrs dict access")
|
|
link = links[1]
|
|
if 'rel' in link.attrs:
|
|
print(" %s rel attribute found: %s" % (OK, link.attrs['rel']))
|
|
else:
|
|
print(" %s rel attribute missing" % FAIL)
|
|
sys.exit(1)
|
|
|
|
print("")
|
|
print("All tests passed!")
|