diff --git a/ROADMAP.md b/ROADMAP.md index 978a4f3..3103b2b 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -289,7 +289,7 @@ PPF (Proxy Fetcher) is a Python 2 proxy scraping and validation framework design |------|-------------|------| | ~~Dual _known_proxies~~ | ~~ppf.py and fetch.py maintain separate caches~~ | **Resolved** | | Global config in fetch.py | set_config() pattern is fragile | Low - works but not clean | -| No input validation | Proxy strings parsed without validation | Medium - could crash on bad data | +| ~~No input validation~~ | ~~Proxy strings parsed without validation~~ | **Resolved** | | ~~Silent exception catching~~ | ~~Some except: pass patterns hide errors~~ | **Resolved** | | ~~Hardcoded timeouts~~ | ~~Various timeout values scattered in code~~ | **Resolved** | diff --git a/fetch.py b/fetch.py index b93e15c..6e59b3b 100644 --- a/fetch.py +++ b/fetch.py @@ -90,24 +90,85 @@ def _fetch_contents(url, head = False, proxy=None): return res def valid_port(port): - return port > 0 and port < 65535 + """Check if port number is valid (1-65535).""" + return port >= 1 and port <= 65535 + def is_usable_proxy(proxy): - ip, port = proxy.split(':') - if not valid_port(int(port)): return False + """Validate proxy string format and reject unusable addresses. - octets = ip.split('.') - A = int(octets[0]) - B = int(octets[1]) - C = int(octets[2]) - D = int(octets[3]) + Rejects: + - Malformed strings (not ip:port format) + - Invalid port (0, >65535) + - Invalid IP octets (>255) + - Private ranges: 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16 + - Loopback: 127.0.0.0/8 + - Link-local: 169.254.0.0/16 + - CGNAT: 100.64.0.0/10 + - Multicast: 224.0.0.0/4 + - Reserved: 240.0.0.0/4 + - Unspecified: 0.0.0.0 + """ + try: + if ':' not in proxy: + return False - if (A < 1 or A > 254 or \ - B > 255 or C > 255 or D > 255) or \ - (A == 10 or A == 127) or \ - (A == 192 and B == 168) or \ - (A == 172 and B >= 16 and B <= 31): return False - return True + ip, port_str = proxy.rsplit(':', 1) + port = int(port_str) + + if not valid_port(port): + return False + + octets = ip.split('.') + if len(octets) != 4: + return False + + A, B, C, D = [int(o) for o in octets] + + # Validate octet ranges + if any(o < 0 or o > 255 for o in (A, B, C, D)): + return False + + # Reject first octet 0 (0.0.0.0/8 - unspecified/invalid) + if A == 0: + return False + + # Reject loopback (127.0.0.0/8) + if A == 127: + return False + + # Reject private 10.0.0.0/8 + if A == 10: + return False + + # Reject private 172.16.0.0/12 + if A == 172 and 16 <= B <= 31: + return False + + # Reject private 192.168.0.0/16 + if A == 192 and B == 168: + return False + + # Reject link-local 169.254.0.0/16 + if A == 169 and B == 254: + return False + + # Reject CGNAT 100.64.0.0/10 (100.64.0.0 - 100.127.255.255) + if A == 100 and 64 <= B <= 127: + return False + + # Reject multicast 224.0.0.0/4 (224-239.x.x.x) + if 224 <= A <= 239: + return False + + # Reject reserved/future 240.0.0.0/4 (240-255.x.x.x) + if A >= 240: + return False + + return True + + except (ValueError, AttributeError, IndexError): + return False _known_proxies = {}