ppf/test_nobs.py

#!/usr/bin/env python2
# test --nobs functionality (stdlib HTML parsing without BeautifulSoup)

import sys

OK = "[OK]"
FAIL = "[FAIL]"

# Test 1: Import soup_parser
print("Test 1: Import soup_parser")
try:
	import soup_parser
	print("  %s soup_parser imported" % OK)
except Exception as e:
	print("  %s Failed: %s" % (FAIL, e))
	sys.exit(1)

# Test 2: Check bs4 availability detection
print("Test 2: Check bs4 availability")
print("  bs4 available: %s" % soup_parser.is_available())

# Test 3: Test stdlib parser (force --nobs mode)
print("Test 3: Test stdlib parser")
soup_parser.set_nobs(True)

html = '''
<html>
<body>
<a href="http://example.com/proxy1">Link 1</a>
<a href="http://example.com/proxy2" rel="noreferrer">Link 2</a>
<a>No href</a>
<p>Not a link</p>
</body>
</html>
'''

soup = soup_parser.soupify(html, nohtml=True)
links = soup.find_all('a', href=True)

print("  Found %d links with href" % len(links))
if len(links) == 2:
	print("  %s Correct count" % OK)
else:
	print("  %s Expected 2, got %d" % (FAIL, len(links)))
	sys.exit(1)

# Test 4: Verify link attributes
print("Test 4: Verify link attributes")
for i, link in enumerate(links):
	href = link['href']
	print("  Link %d: %s" % (i+1, href))
	if not href.startswith('http://example.com/'):
		print("  %s Unexpected href" % FAIL)
		sys.exit(1)
print("  %s All hrefs correct" % OK)

# Test 5: Test attrs access (used by fetch.py)
print("Test 5: Test attrs dict access")
link = links[1]
if 'rel' in link.attrs:
	print("  %s rel attribute found: %s" % (OK, link.attrs['rel']))
else:
	print("  %s rel attribute missing" % FAIL)
	sys.exit(1)

print("")
print("All tests passed!")