feat: per-listener latency tracking
Each listener now tracks chain setup latency independently via a dict[str, LatencyTracker] on Metrics. The global aggregate stays for summary output. /status embeds per-listener latency on each listener entry; /metrics includes a listener_latency map keyed by host:port.
This commit is contained in:
@@ -178,10 +178,17 @@ curl -s http://127.0.0.1:1081/metrics | jq .
|
|||||||
"connections": 1842,
|
"connections": 1842,
|
||||||
"success": 1790,
|
"success": 1790,
|
||||||
"rate": 4.72,
|
"rate": 4.72,
|
||||||
"latency": {"count": 1000, "min": 45.2, "max": 2841.7, "avg": 312.4, "p50": 198.3, "p95": 890.1, "p99": 1523.6}
|
"latency": {"count": 1000, "min": 45.2, "max": 2841.7, "avg": 312.4, "p50": 198.3, "p95": 890.1, "p99": 1523.6},
|
||||||
|
"listener_latency": {
|
||||||
|
"0.0.0.0:1080": {"count": 500, "p50": 1800.2, "p95": 8200.1, "...": "..."},
|
||||||
|
"0.0.0.0:1081": {"count": 300, "p50": 1000.1, "p95": 3500.2, "...": "..."},
|
||||||
|
"0.0.0.0:1082": {"count": 200, "p50": 400.1, "p95": 1200.5, "...": "..."}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Per-listener latency also appears in `/status` under each listener entry.
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
| Symptom | Check |
|
| Symptom | Check |
|
||||||
|
|||||||
@@ -481,7 +481,8 @@ metrics: conn=1842 ok=1790 fail=52 retries=67 active=3 in=50.0M out=1.0G rate=4.
|
|||||||
|
|
||||||
### `/metrics` JSON response
|
### `/metrics` JSON response
|
||||||
|
|
||||||
`GET /metrics` returns all counters plus rate and latency percentiles:
|
`GET /metrics` returns all counters plus rate, latency percentiles, and
|
||||||
|
per-listener latency breakdowns:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
@@ -502,6 +503,11 @@ metrics: conn=1842 ok=1790 fail=52 retries=67 active=3 in=50.0M out=1.0G rate=4.
|
|||||||
"p50": 198.3,
|
"p50": 198.3,
|
||||||
"p95": 890.1,
|
"p95": 890.1,
|
||||||
"p99": 1523.6
|
"p99": 1523.6
|
||||||
|
},
|
||||||
|
"listener_latency": {
|
||||||
|
"0.0.0.0:1080": {"count": 500, "min": 800.1, "max": 12400.3, "avg": 2100.5, "p50": 1800.2, "p95": 8200.1, "p99": 10500.3},
|
||||||
|
"0.0.0.0:1081": {"count": 300, "min": 400.5, "max": 5200.1, "avg": 1200.3, "p50": 1000.1, "p95": 3500.2, "p99": 4800.7},
|
||||||
|
"0.0.0.0:1082": {"count": 200, "min": 150.2, "max": 2000.1, "avg": 500.3, "p50": 400.1, "p95": 1200.5, "p99": 1800.2}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
@@ -509,11 +515,34 @@ metrics: conn=1842 ok=1790 fail=52 retries=67 active=3 in=50.0M out=1.0G rate=4.
|
|||||||
| Field | Type | Description |
|
| Field | Type | Description |
|
||||||
|-------|------|-------------|
|
|-------|------|-------------|
|
||||||
| `rate` | float | Connections/sec (rolling window of last 256 events) |
|
| `rate` | float | Connections/sec (rolling window of last 256 events) |
|
||||||
| `latency` | object/null | Chain setup latency stats in ms (null if no samples) |
|
| `latency` | object/null | Aggregate chain setup latency in ms (null if no samples) |
|
||||||
| `latency.count` | int | Number of samples in buffer (max 1000) |
|
| `latency.count` | int | Number of samples in buffer (max 1000) |
|
||||||
| `latency.p50` | float | Median latency (ms) |
|
| `latency.p50` | float | Median latency (ms) |
|
||||||
| `latency.p95` | float | 95th percentile (ms) |
|
| `latency.p95` | float | 95th percentile (ms) |
|
||||||
| `latency.p99` | float | 99th percentile (ms) |
|
| `latency.p99` | float | 99th percentile (ms) |
|
||||||
|
| `listener_latency` | object | Per-listener latency, keyed by `host:port` |
|
||||||
|
|
||||||
|
### Per-listener latency
|
||||||
|
|
||||||
|
Each listener tracks chain setup latency independently. The `/status`
|
||||||
|
endpoint includes a `latency` field on each listener entry:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"listeners": [
|
||||||
|
{
|
||||||
|
"listen": "0.0.0.0:1080",
|
||||||
|
"chain": ["socks5://10.200.1.13:9050"],
|
||||||
|
"pool_hops": 2,
|
||||||
|
"latency": {"count": 500, "p50": 1800.2, "p95": 8200.1, "...": "..."}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The aggregate `latency` in `/metrics` combines all listeners. Use
|
||||||
|
`listener_latency` or the per-listener `latency` in `/status` to
|
||||||
|
isolate latency by chain depth.
|
||||||
|
|
||||||
## Profiling
|
## Profiling
|
||||||
|
|
||||||
|
|||||||
@@ -76,6 +76,9 @@ def _handle_status(ctx: dict) -> tuple[int, dict]:
|
|||||||
"listen": f"{lc.listen_host}:{lc.listen_port}",
|
"listen": f"{lc.listen_host}:{lc.listen_port}",
|
||||||
"chain": [str(h) for h in lc.chain],
|
"chain": [str(h) for h in lc.chain],
|
||||||
"pool_hops": lc.pool_hops,
|
"pool_hops": lc.pool_hops,
|
||||||
|
"latency": metrics.get_listener_latency(
|
||||||
|
f"{lc.listen_host}:{lc.listen_port}"
|
||||||
|
).stats(),
|
||||||
}
|
}
|
||||||
for lc in config.listeners
|
for lc in config.listeners
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -86,6 +86,13 @@ class Metrics:
|
|||||||
self.started: float = time.monotonic()
|
self.started: float = time.monotonic()
|
||||||
self.conn_rate: RateTracker = RateTracker()
|
self.conn_rate: RateTracker = RateTracker()
|
||||||
self.latency: LatencyTracker = LatencyTracker()
|
self.latency: LatencyTracker = LatencyTracker()
|
||||||
|
self.listener_latency: dict[str, LatencyTracker] = {}
|
||||||
|
|
||||||
|
def get_listener_latency(self, key: str) -> LatencyTracker:
|
||||||
|
"""Get or create a per-listener latency tracker."""
|
||||||
|
if key not in self.listener_latency:
|
||||||
|
self.listener_latency[key] = LatencyTracker()
|
||||||
|
return self.listener_latency[key]
|
||||||
|
|
||||||
def summary(self) -> str:
|
def summary(self) -> str:
|
||||||
"""One-line log-friendly summary."""
|
"""One-line log-friendly summary."""
|
||||||
@@ -117,6 +124,9 @@ class Metrics:
|
|||||||
"uptime": round(time.monotonic() - self.started, 1),
|
"uptime": round(time.monotonic() - self.started, 1),
|
||||||
"rate": round(self.conn_rate.rate(), 2),
|
"rate": round(self.conn_rate.rate(), 2),
|
||||||
"latency": self.latency.stats(),
|
"latency": self.latency.stats(),
|
||||||
|
"listener_latency": {
|
||||||
|
k: v.stats() for k, v in self.listener_latency.items()
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -129,6 +129,9 @@ async def _handle_client(
|
|||||||
logger.debug("[%s] chain up in %.0fms", tag, dt * 1000)
|
logger.debug("[%s] chain up in %.0fms", tag, dt * 1000)
|
||||||
if metrics:
|
if metrics:
|
||||||
metrics.latency.record(dt)
|
metrics.latency.record(dt)
|
||||||
|
metrics.get_listener_latency(
|
||||||
|
f"{listener.listen_host}:{listener.listen_port}"
|
||||||
|
).record(dt)
|
||||||
break
|
break
|
||||||
except (ProtoError, TimeoutError, ConnectionError, OSError) as e:
|
except (ProtoError, TimeoutError, ConnectionError, OSError) as e:
|
||||||
last_err = e
|
last_err = e
|
||||||
|
|||||||
@@ -135,11 +135,18 @@ class TestHandleStatus:
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
ctx = _make_ctx(config=config)
|
ctx = _make_ctx(config=config)
|
||||||
|
# record some latency for the first listener
|
||||||
|
ctx["metrics"].get_listener_latency("0.0.0.0:1080").record(0.2)
|
||||||
_, body = _handle_status(ctx)
|
_, body = _handle_status(ctx)
|
||||||
assert len(body["listeners"]) == 2
|
assert len(body["listeners"]) == 2
|
||||||
assert body["listeners"][0]["chain"] == ["socks5://127.0.0.1:9050"]
|
assert body["listeners"][0]["chain"] == ["socks5://127.0.0.1:9050"]
|
||||||
assert body["listeners"][0]["pool_hops"] == 0
|
assert body["listeners"][0]["pool_hops"] == 0
|
||||||
assert body["listeners"][1]["pool_hops"] == 1
|
assert body["listeners"][1]["pool_hops"] == 1
|
||||||
|
# per-listener latency present on each entry
|
||||||
|
assert "latency" in body["listeners"][0]
|
||||||
|
assert body["listeners"][0]["latency"]["count"] == 1
|
||||||
|
assert "latency" in body["listeners"][1]
|
||||||
|
assert body["listeners"][1]["latency"] is None
|
||||||
|
|
||||||
|
|
||||||
class TestHandleMetrics:
|
class TestHandleMetrics:
|
||||||
@@ -156,6 +163,7 @@ class TestHandleMetrics:
|
|||||||
assert "uptime" in body
|
assert "uptime" in body
|
||||||
assert "rate" in body
|
assert "rate" in body
|
||||||
assert "latency" in body
|
assert "latency" in body
|
||||||
|
assert "listener_latency" in body
|
||||||
|
|
||||||
|
|
||||||
class TestHandlePool:
|
class TestHandlePool:
|
||||||
|
|||||||
@@ -136,6 +136,23 @@ class TestMetrics:
|
|||||||
assert "p50=" not in s
|
assert "p50=" not in s
|
||||||
assert "p95=" not in s
|
assert "p95=" not in s
|
||||||
|
|
||||||
|
def test_listener_latency(self):
|
||||||
|
m = Metrics()
|
||||||
|
m.get_listener_latency("0.0.0.0:1080").record(0.5)
|
||||||
|
m.get_listener_latency("0.0.0.0:1080").record(0.6)
|
||||||
|
m.get_listener_latency("0.0.0.0:1081").record(0.1)
|
||||||
|
d = m.to_dict()
|
||||||
|
assert "listener_latency" in d
|
||||||
|
assert "0.0.0.0:1080" in d["listener_latency"]
|
||||||
|
assert "0.0.0.0:1081" in d["listener_latency"]
|
||||||
|
assert d["listener_latency"]["0.0.0.0:1080"]["count"] == 2
|
||||||
|
assert d["listener_latency"]["0.0.0.0:1081"]["count"] == 1
|
||||||
|
|
||||||
|
def test_listener_latency_empty(self):
|
||||||
|
m = Metrics()
|
||||||
|
d = m.to_dict()
|
||||||
|
assert d["listener_latency"] == {}
|
||||||
|
|
||||||
|
|
||||||
# -- _human_bytes ------------------------------------------------------------
|
# -- _human_bytes ------------------------------------------------------------
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user