fetch: add robust proxy string validation
This commit is contained in:
@@ -289,7 +289,7 @@ PPF (Proxy Fetcher) is a Python 2 proxy scraping and validation framework design
|
|||||||
|------|-------------|------|
|
|------|-------------|------|
|
||||||
| ~~Dual _known_proxies~~ | ~~ppf.py and fetch.py maintain separate caches~~ | **Resolved** |
|
| ~~Dual _known_proxies~~ | ~~ppf.py and fetch.py maintain separate caches~~ | **Resolved** |
|
||||||
| Global config in fetch.py | set_config() pattern is fragile | Low - works but not clean |
|
| Global config in fetch.py | set_config() pattern is fragile | Low - works but not clean |
|
||||||
| No input validation | Proxy strings parsed without validation | Medium - could crash on bad data |
|
| ~~No input validation~~ | ~~Proxy strings parsed without validation~~ | **Resolved** |
|
||||||
| ~~Silent exception catching~~ | ~~Some except: pass patterns hide errors~~ | **Resolved** |
|
| ~~Silent exception catching~~ | ~~Some except: pass patterns hide errors~~ | **Resolved** |
|
||||||
| ~~Hardcoded timeouts~~ | ~~Various timeout values scattered in code~~ | **Resolved** |
|
| ~~Hardcoded timeouts~~ | ~~Various timeout values scattered in code~~ | **Resolved** |
|
||||||
|
|
||||||
|
|||||||
89
fetch.py
89
fetch.py
@@ -90,24 +90,85 @@ def _fetch_contents(url, head = False, proxy=None):
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
def valid_port(port):
|
def valid_port(port):
|
||||||
return port > 0 and port < 65535
|
"""Check if port number is valid (1-65535)."""
|
||||||
|
return port >= 1 and port <= 65535
|
||||||
|
|
||||||
|
|
||||||
def is_usable_proxy(proxy):
|
def is_usable_proxy(proxy):
|
||||||
ip, port = proxy.split(':')
|
"""Validate proxy string format and reject unusable addresses.
|
||||||
if not valid_port(int(port)): return False
|
|
||||||
|
|
||||||
octets = ip.split('.')
|
Rejects:
|
||||||
A = int(octets[0])
|
- Malformed strings (not ip:port format)
|
||||||
B = int(octets[1])
|
- Invalid port (0, >65535)
|
||||||
C = int(octets[2])
|
- Invalid IP octets (>255)
|
||||||
D = int(octets[3])
|
- Private ranges: 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16
|
||||||
|
- Loopback: 127.0.0.0/8
|
||||||
|
- Link-local: 169.254.0.0/16
|
||||||
|
- CGNAT: 100.64.0.0/10
|
||||||
|
- Multicast: 224.0.0.0/4
|
||||||
|
- Reserved: 240.0.0.0/4
|
||||||
|
- Unspecified: 0.0.0.0
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if ':' not in proxy:
|
||||||
|
return False
|
||||||
|
|
||||||
if (A < 1 or A > 254 or \
|
ip, port_str = proxy.rsplit(':', 1)
|
||||||
B > 255 or C > 255 or D > 255) or \
|
port = int(port_str)
|
||||||
(A == 10 or A == 127) or \
|
|
||||||
(A == 192 and B == 168) or \
|
if not valid_port(port):
|
||||||
(A == 172 and B >= 16 and B <= 31): return False
|
return False
|
||||||
return True
|
|
||||||
|
octets = ip.split('.')
|
||||||
|
if len(octets) != 4:
|
||||||
|
return False
|
||||||
|
|
||||||
|
A, B, C, D = [int(o) for o in octets]
|
||||||
|
|
||||||
|
# Validate octet ranges
|
||||||
|
if any(o < 0 or o > 255 for o in (A, B, C, D)):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Reject first octet 0 (0.0.0.0/8 - unspecified/invalid)
|
||||||
|
if A == 0:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Reject loopback (127.0.0.0/8)
|
||||||
|
if A == 127:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Reject private 10.0.0.0/8
|
||||||
|
if A == 10:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Reject private 172.16.0.0/12
|
||||||
|
if A == 172 and 16 <= B <= 31:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Reject private 192.168.0.0/16
|
||||||
|
if A == 192 and B == 168:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Reject link-local 169.254.0.0/16
|
||||||
|
if A == 169 and B == 254:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Reject CGNAT 100.64.0.0/10 (100.64.0.0 - 100.127.255.255)
|
||||||
|
if A == 100 and 64 <= B <= 127:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Reject multicast 224.0.0.0/4 (224-239.x.x.x)
|
||||||
|
if 224 <= A <= 239:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Reject reserved/future 240.0.0.0/4 (240-255.x.x.x)
|
||||||
|
if A >= 240:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except (ValueError, AttributeError, IndexError):
|
||||||
|
return False
|
||||||
|
|
||||||
_known_proxies = {}
|
_known_proxies = {}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user