docs: update roadmap and task tracking
- README: update feature list - ROADMAP: add completed features, update priorities - TODO: mark completed tasks, add new items - config.ini.sample: update example values - http2: minor cleanup
This commit is contained in:
11
README.md
11
README.md
@@ -201,7 +201,7 @@ stale_count INT -- checks without new proxies
|
|||||||
|
|
||||||
```ini
|
```ini
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=PPF Proxy Validator
|
Description=PPF Proxy Fetcher
|
||||||
After=network-online.target tor.service
|
After=network-online.target tor.service
|
||||||
Wants=network-online.target
|
Wants=network-online.target
|
||||||
|
|
||||||
@@ -209,7 +209,8 @@ Wants=network-online.target
|
|||||||
Type=simple
|
Type=simple
|
||||||
User=ppf
|
User=ppf
|
||||||
WorkingDirectory=/opt/ppf
|
WorkingDirectory=/opt/ppf
|
||||||
ExecStart=/usr/bin/python2 proxywatchd.py
|
# ppf.py is the main entry point (runs harvester + validator)
|
||||||
|
ExecStart=/usr/bin/python2 ppf.py
|
||||||
Restart=on-failure
|
Restart=on-failure
|
||||||
RestartSec=30
|
RestartSec=30
|
||||||
|
|
||||||
@@ -224,15 +225,19 @@ WantedBy=multi-user.target
|
|||||||
podman build -t ppf:latest .
|
podman build -t ppf:latest .
|
||||||
|
|
||||||
# Run with persistent storage
|
# Run with persistent storage
|
||||||
|
# IMPORTANT: Use ppf.py as entry point (runs both harvester + validator)
|
||||||
podman run -d --name ppf \
|
podman run -d --name ppf \
|
||||||
|
--network=host \
|
||||||
-v ./data:/app/data:Z \
|
-v ./data:/app/data:Z \
|
||||||
-v ./config.ini:/app/config.ini:ro \
|
-v ./config.ini:/app/config.ini:ro \
|
||||||
ppf:latest python proxywatchd.py
|
ppf:latest python ppf.py
|
||||||
|
|
||||||
# Generate systemd unit
|
# Generate systemd unit
|
||||||
podman generate systemd --name ppf --files --new
|
podman generate systemd --name ppf --files --new
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Note: `--network=host` required for Tor access at 127.0.0.1:9050.
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
### Low Success Rate
|
### Low Success Rate
|
||||||
|
|||||||
19
ROADMAP.md
19
ROADMAP.md
@@ -187,7 +187,7 @@ PPF (Proxy Fetcher) is a Python 2 proxy scraping and validation framework design
|
|||||||
│ │ │
|
│ │ │
|
||||||
│ [x] Standardize logging │ [x] Geographic validation │
|
│ [x] Standardize logging │ [x] Geographic validation │
|
||||||
│ [x] Config validation │ [x] Additional scrapers │
|
│ [x] Config validation │ [x] Additional scrapers │
|
||||||
│ [ ] Export functionality │ [ ] API sources │
|
│ [x] Export functionality │ [ ] API sources │
|
||||||
│ [x] Status output │ [ ] Protocol fingerprinting │
|
│ [x] Status output │ [ ] Protocol fingerprinting │
|
||||||
│ │ │
|
│ │ │
|
||||||
└──────────────────────────┴──────────────────────────────────────────────────┘
|
└──────────────────────────┴──────────────────────────────────────────────────┘
|
||||||
@@ -281,6 +281,22 @@ PPF (Proxy Fetcher) is a Python 2 proxy scraping and validation framework design
|
|||||||
- [x] TLS handshake validation with certificate verification
|
- [x] TLS handshake validation with certificate verification
|
||||||
- [x] Detects MITM proxies that intercept SSL connections
|
- [x] Detects MITM proxies that intercept SSL connections
|
||||||
|
|
||||||
|
### Export Functionality (Done)
|
||||||
|
- [x] export.py CLI tool for exporting working proxies
|
||||||
|
- [x] Multiple formats: txt, json, csv, len (length-prefixed)
|
||||||
|
- [x] Filters: proto, country, anonymity, max_latency
|
||||||
|
- [x] Sort options: latency, added, tested, success
|
||||||
|
- [x] Output to stdout or file
|
||||||
|
|
||||||
|
### Web Dashboard (Done)
|
||||||
|
- [x] /dashboard endpoint with dark theme HTML UI
|
||||||
|
- [x] /api/stats endpoint for JSON runtime statistics
|
||||||
|
- [x] Auto-refresh with JavaScript fetch every 5 seconds
|
||||||
|
- [x] Stats provider callback from proxywatchd.py to httpd.py
|
||||||
|
- [x] Displays: tested/passed/success rate, thread count, uptime
|
||||||
|
- [x] Tor pool health: per-host latency, success rate, availability
|
||||||
|
- [x] Failure categories breakdown: timeout, proxy, ssl, closed
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Technical Debt
|
## Technical Debt
|
||||||
@@ -311,3 +327,4 @@ PPF (Proxy Fetcher) is a Python 2 proxy scraping and validation framework design
|
|||||||
| comboparse.py | Config/arg parser framework | Stable, cleaned |
|
| comboparse.py | Config/arg parser framework | Stable, cleaned |
|
||||||
| soup_parser.py | BeautifulSoup wrapper | Stable, cleaned |
|
| soup_parser.py | BeautifulSoup wrapper | Stable, cleaned |
|
||||||
| misc.py | Utilities (timestamp, logging) | Stable, cleaned |
|
| misc.py | Utilities (timestamp, logging) | Stable, cleaned |
|
||||||
|
| export.py | Proxy export CLI tool | Active |
|
||||||
|
|||||||
49
TODO.md
49
TODO.md
@@ -133,37 +133,14 @@ and report() methods. Integrated into main loop with configurable stats_interval
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### [ ] 14. Export Functionality
|
### [x] 14. Export Functionality
|
||||||
|
|
||||||
**Problem:** No easy way to export working proxies for use elsewhere.
|
**Completed.** Added export.py CLI tool for exporting working proxies.
|
||||||
|
- Formats: txt (default), json, csv, len (length-prefixed)
|
||||||
**Implementation:**
|
- Filters: --proto, --country, --anonymity, --max-latency
|
||||||
```python
|
- Options: --sort (latency, added, tested, success), --limit, --pretty
|
||||||
# new file: export.py
|
- Output: stdout or --output file
|
||||||
def export_proxies(proxydb, format='txt', filters=None):
|
- Usage: `python export.py --proto http --country US --sort latency --limit 100`
|
||||||
"""Export working proxies to various formats."""
|
|
||||||
|
|
||||||
query = 'SELECT proto, proxy FROM proxylist WHERE failed=0'
|
|
||||||
if filters:
|
|
||||||
if 'proto' in filters:
|
|
||||||
query += ' AND proto=?'
|
|
||||||
|
|
||||||
rows = proxydb.execute(query).fetchall()
|
|
||||||
|
|
||||||
if format == 'txt':
|
|
||||||
return '\n'.join('%s://%s' % (r[0], r[1]) for r in rows)
|
|
||||||
elif format == 'json':
|
|
||||||
import json
|
|
||||||
return json.dumps([{'proto': r[0], 'address': r[1]} for r in rows])
|
|
||||||
elif format == 'csv':
|
|
||||||
return 'proto,address\n' + '\n'.join('%s,%s' % r for r in rows)
|
|
||||||
|
|
||||||
# CLI: python export.py --format json --proto socks5 > proxies.json
|
|
||||||
```
|
|
||||||
|
|
||||||
**Files:** new export.py
|
|
||||||
**Effort:** Low
|
|
||||||
**Risk:** Low
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -251,8 +228,16 @@ if __name__ == '__main__':
|
|||||||
- Integrated into proxywatchd.py (starts when httpd.enabled=True)
|
- Integrated into proxywatchd.py (starts when httpd.enabled=True)
|
||||||
- Config: [httpd] section with listenip, port, enabled
|
- Config: [httpd] section with listenip, port, enabled
|
||||||
|
|
||||||
### [ ] 20. Web Dashboard
|
### [x] 20. Web Dashboard
|
||||||
Status page showing live statistics.
|
|
||||||
|
**Completed.** Added web dashboard with live statistics.
|
||||||
|
- httpd.py: DASHBOARD_HTML template with dark theme UI
|
||||||
|
- Endpoint: /dashboard (HTML page with auto-refresh)
|
||||||
|
- Endpoint: /api/stats (JSON runtime statistics)
|
||||||
|
- Stats include: tested/passed counts, success rate, thread count, uptime
|
||||||
|
- Tor pool health: per-host latency, success rate, availability
|
||||||
|
- Failure categories: timeout, proxy, ssl, closed, etc.
|
||||||
|
- proxywatchd.py: get_runtime_stats() method provides stats callback
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ tor_hosts = 127.0.0.1:9050
|
|||||||
[watchd]
|
[watchd]
|
||||||
max_fail = 5
|
max_fail = 5
|
||||||
threads = 10
|
threads = 10
|
||||||
|
min_threads = 5
|
||||||
timeout = 9
|
timeout = 9
|
||||||
submit_after = 200
|
submit_after = 200
|
||||||
use_ssl = 0
|
use_ssl = 0
|
||||||
@@ -26,8 +27,7 @@ threads = 3
|
|||||||
tor_safeguard = 0
|
tor_safeguard = 0
|
||||||
|
|
||||||
[scraper]
|
[scraper]
|
||||||
|
enabled = 1
|
||||||
[flood]
|
|
||||||
|
|
||||||
[httpd]
|
[httpd]
|
||||||
listenip = 127.0.0.1
|
listenip = 127.0.0.1
|
||||||
|
|||||||
8
http2.py
8
http2.py
@@ -159,7 +159,7 @@ class RsHttp():
|
|||||||
if postdata != '':
|
if postdata != '':
|
||||||
s += postdata
|
s += postdata
|
||||||
if self.debugreq:
|
if self.debugreq:
|
||||||
print ">>>\n", s
|
print(">>>\n", s)
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def _make_head_request(self, url, extras=None):
|
def _make_head_request(self, url, extras=None):
|
||||||
@@ -268,7 +268,7 @@ class RsHttp():
|
|||||||
res = res.decode(charset)
|
res = res.decode(charset)
|
||||||
|
|
||||||
if self.debugreq:
|
if self.debugreq:
|
||||||
print "<<<\n", s, res
|
print("<<<\n", s, res)
|
||||||
|
|
||||||
return (s, res, redirect)
|
return (s, res, redirect)
|
||||||
|
|
||||||
@@ -377,7 +377,7 @@ class RsHttp():
|
|||||||
l = self.conn.recvline().strip()
|
l = self.conn.recvline().strip()
|
||||||
s += l + '\n'
|
s += l + '\n'
|
||||||
if l == '': break
|
if l == '': break
|
||||||
if self.debugreq: print "<<<\n", s
|
if self.debugreq: print("<<<\n", s)
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def head(self, url, extras=None):
|
def head(self, url, extras=None):
|
||||||
@@ -433,7 +433,7 @@ if __name__ == '__main__':
|
|||||||
http = RsHttp(host=host, port=port, timeout=15, ssl=use_ssl, follow_redirects=True, auto_set_cookies=True)
|
http = RsHttp(host=host, port=port, timeout=15, ssl=use_ssl, follow_redirects=True, auto_set_cookies=True)
|
||||||
http.debugreq = True
|
http.debugreq = True
|
||||||
if not http.connect():
|
if not http.connect():
|
||||||
print "sorry, couldn't connect"
|
print("sorry, couldn't connect")
|
||||||
else:
|
else:
|
||||||
hdr = http.head(uri)
|
hdr = http.head(uri)
|
||||||
hdr, res = http.get(uri)
|
hdr, res = http.get(uri)
|
||||||
|
|||||||
Reference in New Issue
Block a user