feat: Implement latency profiling and guardrails for performance monitoring
CI / lint-test-build (push) Failing after 19s
CI / lint-test-build (push) Failing after 19s
- Added synthetic latency profiler scenarios and CLI scripts for baseline generation and regression checks. - Introduced latency baseline and threshold artifacts for CI enforcement. - Enhanced CI workflow with latency guardrail checks. - Updated documentation to include latency profiling commands and performance metrics. - Added unit tests for latency guardrail evaluation.
This commit is contained in:
@@ -0,0 +1,176 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from pathlib import Path
|
||||
from statistics import fmean
|
||||
from tempfile import gettempdir
|
||||
from time import perf_counter
|
||||
|
||||
from arbitrade.config.settings import Settings
|
||||
from arbitrade.metrics import MetricsCalculator
|
||||
from arbitrade.storage.db import DuckDBStore
|
||||
|
||||
|
||||
def _python_scan_compute(store: DuckDBStore) -> tuple[float, float | None, float | None]:
|
||||
with store.connect() as conn:
|
||||
trade_rows = conn.execute("""
|
||||
SELECT started_at, finished_at, realized_pnl
|
||||
FROM trades
|
||||
WHERE finished_at IS NOT NULL
|
||||
""").fetchall()
|
||||
opportunity_rows = conn.execute("SELECT detected_at FROM opportunities").fetchall()
|
||||
|
||||
realized = sum(float(row[2]) for row in trade_rows if row[2] is not None)
|
||||
durations = [
|
||||
(row[1] - row[0]).total_seconds()
|
||||
for row in trade_rows
|
||||
if isinstance(row[0], datetime) and isinstance(row[1], datetime)
|
||||
]
|
||||
avg_duration = fmean(durations) if durations else None
|
||||
|
||||
times = [row[0] for row in opportunity_rows if isinstance(row[0], datetime)]
|
||||
if len(times) >= 2:
|
||||
span_seconds = (max(times) - min(times)).total_seconds()
|
||||
opm = len(times) / (span_seconds / 60.0) if span_seconds > 0.0 else float(len(times))
|
||||
elif len(times) == 1:
|
||||
opm = 60.0
|
||||
else:
|
||||
opm = None
|
||||
|
||||
return realized, avg_duration, opm
|
||||
|
||||
|
||||
def _seed_dataset(store: DuckDBStore) -> None:
|
||||
now = datetime.now(UTC)
|
||||
|
||||
trade_rows: list[tuple[object, ...]] = []
|
||||
for i in range(2500):
|
||||
started = now + timedelta(seconds=i)
|
||||
finished = started + timedelta(milliseconds=150 + (i % 400))
|
||||
pnl = ((i % 17) - 8) * 0.25
|
||||
trade_rows.append(
|
||||
(
|
||||
f"t{i}",
|
||||
started,
|
||||
finished,
|
||||
"filled",
|
||||
pnl,
|
||||
pnl * 0.9,
|
||||
100.0,
|
||||
"USD->BTC->ETH->USD",
|
||||
3,
|
||||
)
|
||||
)
|
||||
|
||||
opportunity_rows: list[tuple[object, ...]] = []
|
||||
for i in range(5000):
|
||||
detected_at = now + timedelta(milliseconds=200 * i)
|
||||
opportunity_rows.append((detected_at, "USD->BTC->ETH->USD", 2.5, 1.2, 0.03, bool(i % 2)))
|
||||
|
||||
order_rows: list[tuple[object, ...]] = []
|
||||
for i in range(3500):
|
||||
order_rows.append(
|
||||
(
|
||||
f"t{i % 2500}",
|
||||
f"o{i}",
|
||||
0,
|
||||
"BTC/USD",
|
||||
"buy",
|
||||
1.0,
|
||||
i,
|
||||
"closed",
|
||||
0.9,
|
||||
100.0,
|
||||
"{}",
|
||||
now,
|
||||
)
|
||||
)
|
||||
|
||||
with store.connect() as conn:
|
||||
conn.execute("DELETE FROM trades")
|
||||
conn.execute("DELETE FROM opportunities")
|
||||
conn.execute("DELETE FROM orders")
|
||||
conn.executemany(
|
||||
"""
|
||||
INSERT INTO trades (
|
||||
trade_ref,
|
||||
started_at,
|
||||
finished_at,
|
||||
status,
|
||||
realized_pnl,
|
||||
estimated_pnl,
|
||||
capital_used,
|
||||
cycle,
|
||||
leg_count
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
trade_rows,
|
||||
)
|
||||
conn.executemany(
|
||||
"""
|
||||
INSERT INTO opportunities (
|
||||
detected_at,
|
||||
cycle,
|
||||
gross_pct,
|
||||
net_pct,
|
||||
est_profit,
|
||||
executed
|
||||
) VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
opportunity_rows,
|
||||
)
|
||||
conn.executemany(
|
||||
"""
|
||||
INSERT INTO orders (
|
||||
trade_ref,
|
||||
order_ref,
|
||||
leg_index,
|
||||
pair,
|
||||
side,
|
||||
volume,
|
||||
user_ref,
|
||||
status,
|
||||
filled_volume,
|
||||
avg_price,
|
||||
raw_response,
|
||||
recorded_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
order_rows,
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
db_path = Path(gettempdir()) / "arbitrade_metrics_bench.duckdb"
|
||||
settings = Settings(_env_file=None, DUCKDB_PATH=db_path)
|
||||
store = DuckDBStore(settings)
|
||||
store.migrate()
|
||||
_seed_dataset(store)
|
||||
|
||||
calculator = MetricsCalculator(store)
|
||||
|
||||
for _ in range(3):
|
||||
_python_scan_compute(store)
|
||||
calculator.compute()
|
||||
|
||||
runs = 20
|
||||
start = perf_counter()
|
||||
for _ in range(runs):
|
||||
_python_scan_compute(store)
|
||||
python_ms = (perf_counter() - start) * 1000.0 / runs
|
||||
|
||||
start = perf_counter()
|
||||
for _ in range(runs):
|
||||
calculator.compute()
|
||||
sql_ms = (perf_counter() - start) * 1000.0 / runs
|
||||
|
||||
speedup = (python_ms / sql_ms) if sql_ms > 0.0 else 0.0
|
||||
|
||||
print(f"python_scan_avg_ms={python_ms:.3f}")
|
||||
print(f"sql_aggregate_avg_ms={sql_ms:.3f}")
|
||||
print(f"speedup_x={speedup:.2f}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,50 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from arbitrade.perf.guardrails import evaluate_guardrails
|
||||
from arbitrade.perf.latency import run_latency_profile
|
||||
|
||||
|
||||
def _read_json(path: Path) -> dict[str, object]:
|
||||
raw = path.read_text(encoding="utf-8")
|
||||
parsed = json.loads(raw)
|
||||
if not isinstance(parsed, dict):
|
||||
raise ValueError(f"Expected object JSON at {path}")
|
||||
return {str(k): parsed[k] for k in parsed}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Check latency profile against baseline thresholds."
|
||||
)
|
||||
parser.add_argument("--baseline", type=Path, required=True)
|
||||
parser.add_argument("--thresholds", type=Path, required=True)
|
||||
parser.add_argument("--iterations", type=int, default=600)
|
||||
parser.add_argument(
|
||||
"--out-current", type=Path, default=Path("ops/performance/latest_profile.json")
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
baseline = _read_json(args.baseline)
|
||||
thresholds = _read_json(args.thresholds)
|
||||
current = run_latency_profile(iterations=args.iterations)
|
||||
|
||||
args.out_current.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.out_current.write_text(json.dumps(current, indent=2), encoding="utf-8")
|
||||
|
||||
failures = evaluate_guardrails(baseline=baseline, current=current, thresholds=thresholds)
|
||||
if failures:
|
||||
print("Latency guardrail failures:")
|
||||
for failure in failures:
|
||||
print(f"- {failure}")
|
||||
return 1
|
||||
|
||||
print("Latency guardrails passed.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,54 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
from arbitrade.perf.latency import run_latency_profile
|
||||
|
||||
|
||||
def _format_summary(profile: dict[str, object]) -> str:
|
||||
scenarios = profile.get("scenarios")
|
||||
if not isinstance(scenarios, dict):
|
||||
return "No scenarios found."
|
||||
|
||||
lines = ["Latency profiling summary:"]
|
||||
for scenario_name, payload in scenarios.items():
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
lines.append(f"- {scenario_name}")
|
||||
stages = payload.get("stages")
|
||||
if not isinstance(stages, dict):
|
||||
continue
|
||||
for stage_name, stage_payload in stages.items():
|
||||
if not isinstance(stage_payload, dict):
|
||||
continue
|
||||
p95 = float(stage_payload.get("p95_ms", 0.0))
|
||||
p99 = float(stage_payload.get("p99_ms", 0.0))
|
||||
lines.append(f" - {stage_name}: p95={p95:.4f}ms p99={p99:.4f}ms")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Profile synthetic latency scenarios.")
|
||||
parser.add_argument("--iterations", type=int, default=600)
|
||||
parser.add_argument("--output", type=Path, default=None)
|
||||
args = parser.parse_args()
|
||||
|
||||
profile = run_latency_profile(iterations=args.iterations)
|
||||
profile["generated_at"] = datetime.now(UTC).isoformat()
|
||||
|
||||
print(_format_summary(profile))
|
||||
|
||||
if args.output is not None:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(json.dumps(profile, indent=2), encoding="utf-8")
|
||||
print(f"Wrote profile JSON to {args.output}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user