#!/usr/bin/env python2 # test --nobs functionality (stdlib HTML parsing without BeautifulSoup) import sys OK = "[OK]" FAIL = "[FAIL]" # Test 1: Import soup_parser print("Test 1: Import soup_parser") try: import soup_parser print(" %s soup_parser imported" % OK) except Exception as e: print(" %s Failed: %s" % (FAIL, e)) sys.exit(1) # Test 2: Check bs4 availability detection print("Test 2: Check bs4 availability") print(" bs4 available: %s" % soup_parser.is_available()) # Test 3: Test stdlib parser (force --nobs mode) print("Test 3: Test stdlib parser") soup_parser.set_nobs(True) html = ''' Link 1 Link 2 No href

Not a link

''' soup = soup_parser.soupify(html, nohtml=True) links = soup.find_all('a', href=True) print(" Found %d links with href" % len(links)) if len(links) == 2: print(" %s Correct count" % OK) else: print(" %s Expected 2, got %d" % (FAIL, len(links))) sys.exit(1) # Test 4: Verify link attributes print("Test 4: Verify link attributes") for i, link in enumerate(links): href = link['href'] print(" Link %d: %s" % (i+1, href)) if not href.startswith('http://example.com/'): print(" %s Unexpected href" % FAIL) sys.exit(1) print(" %s All hrefs correct" % OK) # Test 5: Test attrs access (used by fetch.py) print("Test 5: Test attrs dict access") link = links[1] if 'rel' in link.attrs: print(" %s rel attribute found: %s" % (OK, link.attrs['rel'])) else: print(" %s rel attribute missing" % FAIL) sys.exit(1) print("") print("All tests passed!")