add test infrastructure for --nobs
This commit is contained in:
@@ -2,8 +2,11 @@ FROM python:2.7-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# install dependencies (optional - bs4 can be skipped with --nobs)
|
||||
COPY requirements.txt .
|
||||
RUN pip install -r requirements.txt || true
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
CMD ["python", "ppf.py"]
|
||||
# default: run syntax check
|
||||
CMD ["python", "-m", "py_compile", "ppf.py", "soup_parser.py", "config.py", "fetch.py"]
|
||||
68
test_nobs.py
Normal file
68
test_nobs.py
Normal file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Test --nobs functionality (stdlib HTML parsing without BeautifulSoup)."""
|
||||
|
||||
import sys
|
||||
|
||||
OK = "[OK]"
|
||||
FAIL = "[FAIL]"
|
||||
|
||||
# Test 1: Import soup_parser
|
||||
print("Test 1: Import soup_parser")
|
||||
try:
|
||||
import soup_parser
|
||||
print(" %s soup_parser imported" % OK)
|
||||
except Exception as e:
|
||||
print(" %s Failed: %s" % (FAIL, e))
|
||||
sys.exit(1)
|
||||
|
||||
# Test 2: Check bs4 availability detection
|
||||
print("Test 2: Check bs4 availability")
|
||||
print(" bs4 available: %s" % soup_parser.is_available())
|
||||
|
||||
# Test 3: Test stdlib parser (force --nobs mode)
|
||||
print("Test 3: Test stdlib parser")
|
||||
soup_parser.set_nobs(True)
|
||||
|
||||
html = '''
|
||||
<html>
|
||||
<body>
|
||||
<a href="http://example.com/proxy1">Link 1</a>
|
||||
<a href="http://example.com/proxy2" rel="noreferrer">Link 2</a>
|
||||
<a>No href</a>
|
||||
<p>Not a link</p>
|
||||
</body>
|
||||
</html>
|
||||
'''
|
||||
|
||||
soup = soup_parser.soupify(html, nohtml=True)
|
||||
links = soup.find_all('a', href=True)
|
||||
|
||||
print(" Found %d links with href" % len(links))
|
||||
if len(links) == 2:
|
||||
print(" %s Correct count" % OK)
|
||||
else:
|
||||
print(" %s Expected 2, got %d" % (FAIL, len(links)))
|
||||
sys.exit(1)
|
||||
|
||||
# Test 4: Verify link attributes
|
||||
print("Test 4: Verify link attributes")
|
||||
for i, link in enumerate(links):
|
||||
href = link['href']
|
||||
print(" Link %d: %s" % (i+1, href))
|
||||
if not href.startswith('http://example.com/'):
|
||||
print(" %s Unexpected href" % FAIL)
|
||||
sys.exit(1)
|
||||
print(" %s All hrefs correct" % OK)
|
||||
|
||||
# Test 5: Test attrs access (used by fetch.py)
|
||||
print("Test 5: Test attrs dict access")
|
||||
link = links[1]
|
||||
if 'rel' in link.attrs:
|
||||
print(" %s rel attribute found: %s" % (OK, link.attrs['rel']))
|
||||
else:
|
||||
print(" %s rel attribute missing" % FAIL)
|
||||
sys.exit(1)
|
||||
|
||||
print("")
|
||||
print("All tests passed!")
|
||||
Reference in New Issue
Block a user