From 2ea7eb41b739b6c8e182ab14998924cacb759e60 Mon Sep 17 00:00:00 2001 From: Username Date: Sun, 22 Feb 2026 13:50:34 +0100 Subject: [PATCH] tests: add extraction short-circuit and integration tests Cover short-circuit guards, table/JSON/hint extraction, and full extract_proxies() integration (82 tests, all passing). --- tests/test_fetch.py | 192 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) diff --git a/tests/test_fetch.py b/tests/test_fetch.py index 7f1b16b..2b972f8 100644 --- a/tests/test_fetch.py +++ b/tests/test_fetch.py @@ -359,6 +359,198 @@ class TestExtractAuthProxies: assert fetch.extract_auth_proxies('just some text') == [] +class TestExtractAuthProxiesShortCircuit: + """Tests for extract_auth_proxies() short-circuit on missing @.""" + + def test_no_at_sign_returns_empty(self): + """Content without @ skips regex entirely.""" + content = '1.2.3.4:8080 socks5://5.6.7.8:1080 plain text' + assert fetch.extract_auth_proxies(content) == [] + + def test_at_sign_still_extracts(self): + """Content with @ still finds auth proxies.""" + content = 'user:pass@1.2.3.4:8080' + result = fetch.extract_auth_proxies(content) + assert len(result) == 1 + assert result[0][0] == 'user:pass@1.2.3.4:8080' + + def test_at_sign_no_match_returns_empty(self): + """Content with @ but no auth proxy pattern returns empty.""" + content = 'email@example.com has no proxy' + assert fetch.extract_auth_proxies(content) == [] + + +class TestExtractProxiesFromTable: + """Tests for extract_proxies_from_table() with precompiled regexes.""" + + def test_no_table_returns_empty(self): + """Plain text without returns empty.""" + content = '1.2.3.4:8080\n5.6.7.8:3128\n' + assert fetch.extract_proxies_from_table(content) == [] + + def test_simple_table(self): + """Basic HTML table with IP/Port columns is parsed.""" + content = ''' +
+ + + +
IPPortType
1.2.3.48080HTTP
5.6.7.81080SOCKS5
+ ''' + result = fetch.extract_proxies_from_table(content) + assert len(result) == 2 + addrs = [r[0] for r in result] + assert '1.2.3.4:8080' in addrs + assert '5.6.7.8:1080' in addrs + + def test_uppercase_table_tag(self): + """ (uppercase) is also detected.""" + content = ''' +
+ + +
IPPort
1.2.3.48080
+ ''' + result = fetch.extract_proxies_from_table(content) + assert len(result) == 1 + + def test_empty_table(self): + """Table with headers but no data rows returns empty.""" + content = ''' + + +
IPPort
+ ''' + result = fetch.extract_proxies_from_table(content) + assert result == [] + + +class TestExtractProxiesFromJson: + """Tests for extract_proxies_from_json() short-circuit.""" + + def test_no_braces_returns_empty(self): + """Content without { or [ skips JSON parsing.""" + content = '1.2.3.4:8080\n5.6.7.8:3128\n' + assert fetch.extract_proxies_from_json(content) == [] + + def test_json_array_of_objects(self): + """JSON array with ip/port objects is parsed.""" + content = '[{"ip": "1.2.3.4", "port": 8080}]' + result = fetch.extract_proxies_from_json(content) + assert len(result) >= 1 + addrs = [r[0] for r in result] + assert '1.2.3.4:8080' in addrs + + def test_json_array_of_strings(self): + """JSON array of ip:port strings is parsed.""" + content = '["1.2.3.4:8080", "5.6.7.8:3128"]' + result = fetch.extract_proxies_from_json(content) + addrs = [r[0] for r in result] + assert '1.2.3.4:8080' in addrs + assert '5.6.7.8:3128' in addrs + + def test_plain_html_skips_json(self): + """HTML without JSON delimiters returns empty.""" + content = '1.2.3.4:8080' + # HTML has < and > but this function checks for { and [ + # The < > chars won't trigger JSON parsing + result = fetch.extract_proxies_from_json(content) + # May or may not find anything depending on HTML structure + # but should not crash + assert isinstance(result, list) + + +class TestExtractProxiesWithHints: + """Tests for extract_proxies_with_hints().""" + + def test_proto_before_ip(self): + """Protocol keyword before IP:PORT is detected.""" + content = 'socks5 1.2.3.4:8080' + result = fetch.extract_proxies_with_hints(content) + assert '1.2.3.4:8080' in result + assert result['1.2.3.4:8080'] == 'socks5' + + def test_proto_after_ip(self): + """Protocol keyword after IP:PORT is detected.""" + content = '1.2.3.4:8080 socks5' + result = fetch.extract_proxies_with_hints(content) + assert '1.2.3.4:8080' in result + + def test_no_hints_returns_empty(self): + """Plain IP:PORT without protocol hints returns empty.""" + content = '1.2.3.4:8080' + result = fetch.extract_proxies_with_hints(content) + assert result == {} + + +class TestExtractProxiesIntegration: + """Integration tests for extract_proxies() combining all extractors.""" + + def test_plain_text_proxy_list(self): + """Plain text IP:PORT list extracts correctly.""" + content = '1.2.3.4:8080\n5.6.7.8:3128\n9.10.11.12:1080\n' + result = fetch.extract_proxies(content, filter_known=False) + addrs = [r[0] for r in result] + assert '1.2.3.4:8080' in addrs + assert '5.6.7.8:3128' in addrs + assert '9.10.11.12:1080' in addrs + + def test_auth_proxies_extracted(self): + """Auth proxies found in mixed content.""" + content = 'user:pass@1.2.3.4:8080\n5.6.7.8:3128\n' + result = fetch.extract_proxies(content, filter_known=False) + addrs = [r[0] for r in result] + assert 'user:pass@1.2.3.4:8080' in addrs + assert '5.6.7.8:3128' in addrs + + def test_html_table_extraction(self): + """Proxies extracted from HTML table.""" + content = ''' + + + +
IPPort
1.2.3.48080
+ ''' + result = fetch.extract_proxies(content, filter_known=False) + addrs = [r[0] for r in result] + assert '1.2.3.4:8080' in addrs + + def test_json_extraction(self): + """Proxies extracted from JSON content.""" + content = '[{"ip": "1.2.3.4", "port": 8080}]' + result = fetch.extract_proxies(content, filter_known=False) + addrs = [r[0] for r in result] + assert '1.2.3.4:8080' in addrs + + def test_empty_content(self): + """Empty content returns no proxies.""" + result = fetch.extract_proxies('', filter_known=False) + assert result == [] + + def test_private_ips_filtered(self): + """Private IPs are not returned.""" + content = '10.0.0.1:8080\n192.168.1.1:3128\n1.2.3.4:8080\n' + result = fetch.extract_proxies(content, filter_known=False) + addrs = [r[0] for r in result] + assert '10.0.0.1:8080' not in addrs + assert '192.168.1.1:3128' not in addrs + assert '1.2.3.4:8080' in addrs + + def test_proto_from_hints(self): + """Protocol hints are picked up.""" + content = 'socks5 1.2.3.4:8080\n' + result = fetch.extract_proxies(content, filter_known=False) + protos = {r[0]: r[1] for r in result} + assert protos.get('1.2.3.4:8080') == 'socks5' + + def test_proto_from_arg(self): + """Fallback proto from argument is used.""" + content = '1.2.3.4:8080\n' + result = fetch.extract_proxies(content, filter_known=False, proto='socks4') + protos = {r[0]: r[1] for r in result} + assert protos.get('1.2.3.4:8080') == 'socks4' + + class TestConfidenceScoring: """Tests for confidence score constants."""