diff --git a/ROADMAP.md b/ROADMAP.md index 6ec6df6..554f1e4 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -25,7 +25,7 @@ - [x] Built-in control API (runtime metrics, pool state, config reload) - [ ] SOCKS5 server authentication (username/password) - [x] Tor control port integration (circuit renewal via NEWNYM) -- [ ] Metrics (connections/sec, bytes relayed, hop latency) +- [x] Metrics (connections/sec, bytes relayed, hop latency) ## v0.3.0 diff --git a/TASKS.md b/TASKS.md index 56bcb63..2e1f767 100644 --- a/TASKS.md +++ b/TASKS.md @@ -48,6 +48,7 @@ - [x] Replace HTTP health check with TLS handshake (round-robin targets, no httpbin dependency) - [x] Multi-listener with configurable proxy chaining (per-port chain depth) +- [x] Connection rate and chain latency metrics (rate/s, p50/p95/p99) ## Next - [ ] Integration tests with mock proxy server diff --git a/docs/CHEATSHEET.md b/docs/CHEATSHEET.md index 727226b..aaf0f81 100644 --- a/docs/CHEATSHEET.md +++ b/docs/CHEATSHEET.md @@ -164,7 +164,22 @@ python -m pstats ~/.cache/s5p/s5p.prof # container profile output ## Metrics Log ``` -metrics: conn=142 ok=98 fail=44 retries=88 active=3 in=1.2M out=4.5M up=0h05m12s pool=42/65 +metrics: conn=1842 ok=1790 fail=52 retries=67 active=3 in=50.0M out=1.0G rate=4.72/s p50=198.3ms p95=890.1ms up=1h01m01s pool=42/65 +``` + +## Metrics JSON (`/metrics`) + +```bash +curl -s http://127.0.0.1:1081/metrics | jq . +``` + +```json +{ + "connections": 1842, + "success": 1790, + "rate": 4.72, + "latency": {"count": 1000, "min": 45.2, "max": 2841.7, "avg": 312.4, "p50": 198.3, "p95": 890.1, "p99": 1523.6} +} ``` ## Troubleshooting diff --git a/docs/USAGE.md b/docs/USAGE.md index 77c7dea..5714c6d 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -307,7 +307,7 @@ s5p --api 127.0.0.1:1081 -c config/s5p.yaml | Method | Path | Description | |--------|------|-------------| | `GET` | `/status` | Combined summary: uptime, metrics, pool stats, chain | -| `GET` | `/metrics` | Full metrics counters (connections, bytes, uptime) | +| `GET` | `/metrics` | Full metrics counters (connections, bytes, rate, latency) | | `GET` | `/pool` | All proxies with per-entry state | | `GET` | `/pool/alive` | Alive proxies only | | `GET` | `/config` | Current runtime config (sanitized) | @@ -461,7 +461,7 @@ s5p tracks connection metrics and logs a summary every 60 seconds and on shutdown: ``` -metrics: conn=142 ok=98 fail=44 retries=88 active=3 in=1.2M out=4.5M up=0h05m12s pool=42/65 +metrics: conn=1842 ok=1790 fail=52 retries=67 active=3 in=50.0M out=1.0G rate=4.72/s p50=198.3ms p95=890.1ms up=1h01m01s pool=42/65 ``` | Counter | Meaning | @@ -473,9 +473,48 @@ metrics: conn=142 ok=98 fail=44 retries=88 active=3 in=1.2M out=4.5M up=0h05m12s | `active` | Currently relaying | | `in` | Bytes client -> remote | | `out` | Bytes remote -> client | +| `rate` | Connection rate (events/sec, rolling window) | +| `p50` | Median chain setup latency in ms | +| `p95` | 95th percentile chain setup latency in ms | | `up` | Server uptime | | `pool` | Alive/total proxies (only when pool is active) | +### `/metrics` JSON response + +`GET /metrics` returns all counters plus rate and latency percentiles: + +```json +{ + "connections": 1842, + "success": 1790, + "failed": 52, + "retries": 67, + "active": 3, + "bytes_in": 52428800, + "bytes_out": 1073741824, + "uptime": 3661.2, + "rate": 4.72, + "latency": { + "count": 1000, + "min": 45.2, + "max": 2841.7, + "avg": 312.4, + "p50": 198.3, + "p95": 890.1, + "p99": 1523.6 + } +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `rate` | float | Connections/sec (rolling window of last 256 events) | +| `latency` | object/null | Chain setup latency stats in ms (null if no samples) | +| `latency.count` | int | Number of samples in buffer (max 1000) | +| `latency.p50` | float | Median latency (ms) | +| `latency.p95` | float | 95th percentile (ms) | +| `latency.p99` | float | 99th percentile (ms) | + ## Profiling ```bash diff --git a/src/s5p/api.py b/src/s5p/api.py index c86db3c..2fd1158 100644 --- a/src/s5p/api.py +++ b/src/s5p/api.py @@ -63,6 +63,8 @@ def _handle_status(ctx: dict) -> tuple[int, dict]: "active": metrics.active, "bytes_in": metrics.bytes_in, "bytes_out": metrics.bytes_out, + "rate": round(metrics.conn_rate.rate(), 2), + "latency": metrics.latency.stats(), } pool = ctx.get("pool") if pool: diff --git a/src/s5p/metrics.py b/src/s5p/metrics.py index 01ea7c3..8ea5225 100644 --- a/src/s5p/metrics.py +++ b/src/s5p/metrics.py @@ -3,6 +3,69 @@ from __future__ import annotations import time +from collections import deque + + +class RateTracker: + """Rolling window event rate (events/sec). + + Stores up to ``maxlen`` monotonic timestamps. Rate is computed + on read as ``(n - 1) / span`` over the window -- no background timer. + """ + + def __init__(self, maxlen: int = 256) -> None: + self._times: deque[float] = deque(maxlen=maxlen) + + def record(self, now: float | None = None) -> None: + """Record an event at *now* (default: ``time.monotonic()``).""" + self._times.append(now if now is not None else time.monotonic()) + + def rate(self) -> float: + """Return events/sec over the window, 0.0 if < 2 events.""" + n = len(self._times) + if n < 2: + return 0.0 + span = self._times[-1] - self._times[0] + if span <= 0: + return 0.0 + return (n - 1) / span + + +class LatencyTracker: + """Circular buffer of latency samples with percentile stats. + + Stores up to ``maxlen`` float-second samples. ``stats()`` sorts + a copy on read (~0.1 ms for 1000 floats) and returns millisecond values. + """ + + def __init__(self, maxlen: int = 1000) -> None: + self._samples: deque[float] = deque(maxlen=maxlen) + + def record(self, seconds: float) -> None: + """Append a latency sample (in seconds).""" + self._samples.append(seconds) + + @property + def count(self) -> int: + """Number of samples currently stored.""" + return len(self._samples) + + def stats(self) -> dict | None: + """Return ``{count, min, max, avg, p50, p95, p99}`` in ms, or None.""" + n = len(self._samples) + if n == 0: + return None + s = sorted(self._samples) + ms = [v * 1000 for v in s] + return { + "count": n, + "min": round(ms[0], 1), + "max": round(ms[-1], 1), + "avg": round(sum(ms) / n, 1), + "p50": round(ms[int(n * 0.50)], 1), + "p95": round(ms[min(int(n * 0.95), n - 1)], 1), + "p99": round(ms[min(int(n * 0.99), n - 1)], 1), + } class Metrics: @@ -21,16 +84,23 @@ class Metrics: self.bytes_out: int = 0 self.active: int = 0 self.started: float = time.monotonic() + self.conn_rate: RateTracker = RateTracker() + self.latency: LatencyTracker = LatencyTracker() def summary(self) -> str: """One-line log-friendly summary.""" uptime = time.monotonic() - self.started h, rem = divmod(int(uptime), 3600) m, s = divmod(rem, 60) + rate = self.conn_rate.rate() + lat = self.latency.stats() + p50 = f" p50={lat['p50']:.1f}ms" if lat else "" + p95 = f" p95={lat['p95']:.1f}ms" if lat else "" return ( f"conn={self.connections} ok={self.success} fail={self.failed} " f"retries={self.retries} active={self.active} " f"in={_human_bytes(self.bytes_in)} out={_human_bytes(self.bytes_out)} " + f"rate={rate:.2f}/s{p50}{p95} " f"up={h}h{m:02d}m{s:02d}s" ) @@ -45,6 +115,8 @@ class Metrics: "bytes_in": self.bytes_in, "bytes_out": self.bytes_out, "uptime": round(time.monotonic() - self.started, 1), + "rate": round(self.conn_rate.rate(), 2), + "latency": self.latency.stats(), } diff --git a/src/s5p/server.py b/src/s5p/server.py index a36fd9a..98a6430 100644 --- a/src/s5p/server.py +++ b/src/s5p/server.py @@ -73,6 +73,7 @@ async def _handle_client( if metrics: metrics.connections += 1 + metrics.conn_rate.record() try: # -- greeting -- @@ -126,6 +127,8 @@ async def _handle_client( ) dt = time.monotonic() - t0 logger.debug("[%s] chain up in %.0fms", tag, dt * 1000) + if metrics: + metrics.latency.record(dt) break except (ProtoError, TimeoutError, ConnectionError, OSError) as e: last_err = e diff --git a/tests/test_api.py b/tests/test_api.py index 6f92e93..e86b267 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -109,6 +109,8 @@ class TestHandleStatus: assert body["connections"] == 10 assert body["success"] == 8 assert "uptime" in body + assert "rate" in body + assert "latency" in body def test_with_pool(self): pool = MagicMock() @@ -152,6 +154,8 @@ class TestHandleMetrics: assert body["connections"] == 42 assert body["bytes_in"] == 1024 assert "uptime" in body + assert "rate" in body + assert "latency" in body class TestHandlePool: diff --git a/tests/test_metrics.py b/tests/test_metrics.py new file mode 100644 index 0000000..d24491c --- /dev/null +++ b/tests/test_metrics.py @@ -0,0 +1,164 @@ +"""Tests for metrics trackers and helpers.""" + +from s5p.metrics import LatencyTracker, Metrics, RateTracker, _human_bytes + +# -- LatencyTracker ---------------------------------------------------------- + + +class TestLatencyTracker: + """Test latency sample collection and percentile stats.""" + + def test_empty(self): + lt = LatencyTracker() + assert lt.count == 0 + assert lt.stats() is None + + def test_single(self): + lt = LatencyTracker() + lt.record(0.1) + s = lt.stats() + assert s is not None + assert s["count"] == 1 + assert s["min"] == s["max"] == s["avg"] == s["p50"] + + def test_percentiles(self): + lt = LatencyTracker() + # 100 evenly spaced samples: 0.001, 0.002, ..., 0.100 + for i in range(1, 101): + lt.record(i / 1000) + s = lt.stats() + assert s["count"] == 100 + assert s["min"] == 1.0 # 0.001s = 1.0ms + assert s["max"] == 100.0 # 0.100s = 100.0ms + assert 49.0 <= s["avg"] <= 52.0 + assert 50.0 <= s["p50"] <= 52.0 + assert 95.0 <= s["p95"] <= 97.0 + assert 99.0 <= s["p99"] <= 101.0 + + def test_bounded(self): + lt = LatencyTracker(maxlen=5) + for i in range(10): + lt.record(i / 100) + assert lt.count == 5 + s = lt.stats() + # only the last 5 samples remain: 0.05..0.09 + assert s["min"] == 50.0 + assert s["max"] == 90.0 + + def test_milliseconds(self): + lt = LatencyTracker() + lt.record(0.5) # 500ms + s = lt.stats() + assert s["min"] == 500.0 + assert s["max"] == 500.0 + + +# -- RateTracker ------------------------------------------------------------- + + +class TestRateTracker: + """Test rolling window event rate calculation.""" + + def test_empty(self): + rt = RateTracker() + assert rt.rate() == 0.0 + + def test_single(self): + rt = RateTracker() + rt.record() + assert rt.rate() == 0.0 + + def test_known_rate(self): + rt = RateTracker() + # 11 events at 0.1s intervals = 10/1.0 = 10.0/s + for i in range(11): + rt.record(now=100.0 + i * 0.1) + assert abs(rt.rate() - 10.0) < 0.01 + + def test_bounded(self): + rt = RateTracker(maxlen=5) + for i in range(10): + rt.record(now=float(i)) + # only last 5 events: t=5..9, span=4, rate=4/4=1.0 + assert abs(rt.rate() - 1.0) < 0.01 + + def test_zero_span(self): + rt = RateTracker() + rt.record(now=1.0) + rt.record(now=1.0) + assert rt.rate() == 0.0 + + +# -- Metrics ----------------------------------------------------------------- + + +class TestMetrics: + """Test Metrics aggregation and output.""" + + def test_to_dict_includes_rate_and_latency(self): + m = Metrics() + m.connections = 10 + m.conn_rate.record(now=0.0) + m.conn_rate.record(now=1.0) + m.latency.record(0.2) + m.latency.record(0.3) + d = m.to_dict() + assert "rate" in d + assert isinstance(d["rate"], float) + assert "latency" in d + assert d["latency"] is not None + assert d["latency"]["count"] == 2 + + def test_to_dict_latency_none_when_empty(self): + m = Metrics() + d = m.to_dict() + assert d["latency"] is None + assert d["rate"] == 0.0 + + def test_summary_includes_rate(self): + m = Metrics() + m.conn_rate.record(now=0.0) + m.conn_rate.record(now=1.0) + s = m.summary() + assert "rate=" in s + assert "/s" in s + + def test_summary_includes_latency(self): + m = Metrics() + m.latency.record(0.2) + s = m.summary() + assert "p50=" in s + assert "p95=" in s + + def test_summary_no_latency_when_empty(self): + m = Metrics() + s = m.summary() + assert "p50=" not in s + assert "p95=" not in s + + +# -- _human_bytes ------------------------------------------------------------ + + +class TestHumanBytes: + """Test byte count formatting.""" + + def test_bytes(self): + assert _human_bytes(0) == "0B" + assert _human_bytes(512) == "512B" + + def test_kilobytes(self): + assert _human_bytes(1024) == "1.0K" + assert _human_bytes(1536) == "1.5K" + + def test_megabytes(self): + assert _human_bytes(1024 * 1024) == "1.0M" + + def test_gigabytes(self): + assert _human_bytes(1024**3) == "1.0G" + + def test_terabytes(self): + assert _human_bytes(1024**4) == "1.0T" + + def test_petabytes(self): + assert _human_bytes(1024**5) == "1.0P"