From f612c8533a08295119789493b2be2d2c883b0697 Mon Sep 17 00:00:00 2001 From: zwitschi Date: Tue, 2 Jun 2026 08:44:10 +0200 Subject: [PATCH] feat: Add backtesting parameter sweep support and related functionality --- CHANGELOG.md | 3 + README.md | 22 +- scripts/backtest_sweep.py | 151 ++++++++++ src/arbitrade/backtesting/__init__.py | 18 ++ src/arbitrade/backtesting/sweep.py | 396 ++++++++++++++++++++++++++ tests/unit/test_backtesting_sweep.py | 102 +++++++ 6 files changed, 685 insertions(+), 7 deletions(-) create mode 100644 scripts/backtest_sweep.py create mode 100644 src/arbitrade/backtesting/sweep.py create mode 100644 tests/unit/test_backtesting_sweep.py diff --git a/CHANGELOG.md b/CHANGELOG.md index be09aa9..4298045 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,9 @@ - Added synthetic latency profiler scenarios and CLI scripts for baseline generation and regression checks. - Added latency baseline/threshold artifacts and CI latency guardrail enforcement. - Added deterministic replay backtesting engine, CLI script, and unit coverage for JSONL event replay. +- Added backtesting parameter sweep support (`scripts/backtest_sweep.py`) for theta, trade-capital, pair-universe, and staleness-threshold grid search. +- Added persisted sweep artifacts with ranked in-sample/out-of-sample results and promotion-ready candidate reporting. +- Added out-of-sample overfit guards via train/test time-window split and generalization-gap checks. - Added dashboard controls for tradable pair universe selection and strategy mode/parameter configuration. ### Changed diff --git a/README.md b/README.md index 2286834..9a31440 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ Minimum `.env` values: ```env APP_ENV=dev APP_HOST=0.0.0.0 -APP_PORT=8000 +APP_PORT=9090 LOG_LEVEL=INFO LOG_JSON=true DUCKDB_PATH=./data/arbitrade.duckdb @@ -132,8 +132,8 @@ python -m arbitrade.main Health endpoints: -- HTML: `http://localhost:8000/` -- JSON: `http://localhost:8000/health` +- HTML: `http://localhost:9090/` +- JSON: `http://localhost:9090/health` ## Database @@ -283,12 +283,12 @@ Set these in Coolify application settings: - Build Command: leave empty. - Install Command: leave empty. - Start Command: leave empty unless you explicitly want to override the image default. -- Port: `8000` +- Port: `9090` (coolify uses `8000` internally) ### 3) Configure health check and networking - Health Check Path: `/health` -- Exposed Port: `8000` +- Exposed Port: `9090` - Use Coolify-generated domain or attach your own domain. ### 4) Configure persistent storage @@ -305,7 +305,7 @@ Add runtime environment variables in Coolify (UI: Environment Variables): - `APP_ENV=prod` - `APP_HOST=0.0.0.0` -- `APP_PORT=8000` +- `APP_PORT=9090` - `DUCKDB_PATH=/app/data/arbitrade.duckdb` - `LOG_LEVEL=INFO` - `LOG_JSON=true` @@ -431,6 +431,12 @@ Run a deterministic replay backtest from a JSONL event stream: python scripts/backtest_replay.py --events path\to\replay.jsonl --starting-balances USD=1000.0 ``` +Run parameter sweep with train/test split and promotion scoring: + +```powershell +python scripts/backtest_sweep.py --events path\to\replay.jsonl --starting-balances USD=1000.0 --output ops/backtesting/parameter_sweep_results.json +``` + Replay event format: ```json @@ -447,7 +453,9 @@ Notes: - Events are replayed in timestamp order. - The replay engine reuses the production detector, pre-trade validation, trade limits, and execution sequencer. - The simulated execution path applies configurable slippage and execution latency so reports include deterministic trade/miss statistics. - Latency baseline and threshold artifacts: +- Parameter sweep splits replay data into in-sample and out-of-sample windows, ranks configurations by out-of-sample score, and flags overfit via train/test generalization-gap checks. +- Sweep output persists ranked combinations and promotion-ready candidates for paper-trading canary promotion decisions. +- Latency baseline and threshold artifacts: - `ops/performance/latency_baseline.json` - `ops/performance/latency_thresholds.json` diff --git a/scripts/backtest_sweep.py b/scripts/backtest_sweep.py new file mode 100644 index 0000000..e7376a8 --- /dev/null +++ b/scripts/backtest_sweep.py @@ -0,0 +1,151 @@ +from __future__ import annotations + +import argparse +from collections.abc import Mapping, Sequence +from pathlib import Path + +from arbitrade.backtesting import load_replay_events +from arbitrade.backtesting.sweep import ( + PromotionCriteria, + SweepResult, + build_parameter_grid, + persist_sweep_results, + run_parameter_search, +) +from arbitrade.detection.graph import CurrencyGraph, TriangularCycle + + +def _parse_balances(raw: str) -> Mapping[str, float]: + balances: dict[str, float] = {} + for entry in raw.split(","): + stripped = entry.strip() + if not stripped: + continue + asset, value = stripped.split("=", 1) + balances[asset.strip().upper()] = float(value) + return balances + + +def _parse_float_list(raw: str) -> list[float]: + values = [item.strip() for item in raw.split(",") if item.strip()] + if not values: + raise ValueError("expected at least one numeric value") + return [float(value) for value in values] + + +def _parse_pair_universes(raw: str) -> list[tuple[str, ...]]: + universes: list[tuple[str, ...]] = [] + for chunk in raw.split(";"): + symbols = tuple(item.strip().upper() + for item in chunk.split("|") if item.strip()) + if symbols: + universes.append(symbols) + if not universes: + raise ValueError("at least one pair universe must be provided") + return universes + + +def _build_graph_from_symbols(symbols: Sequence[str]) -> dict[str, list[TriangularCycle]]: + graph = CurrencyGraph() + for symbol in symbols: + normalized = symbol.upper() + if "/" not in normalized: + continue + base, quote = normalized.split("/", 1) + graph.add_pair(base, quote, normalized) + + cycles = graph.triangular_cycles() + return graph.index_cycles_by_pair(cycles) + + +def _print_top_results(results: Sequence[SweepResult], *, limit: int = 5) -> None: + print(f"Top {min(limit, len(results))} result(s) by out-of-sample score:") + for index, result in enumerate(results[:limit], start=1): + print( + "- " + f"#{index} " + f"theta={result.parameters.min_profit_threshold:.6f}, " + f"capital={result.parameters.trade_capital:.2f}, " + f"pairs={','.join(result.parameters.pair_universe)}, " + f"staleness={result.parameters.staleness_threshold_seconds:.2f}s, " + f"test_score={result.test_score:.4f}, " + f"promotion_ready={result.promotion_ready}" + ) + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Run backtesting parameter sweep with train/test split.") + parser.add_argument("--events", type=Path, required=True) + parser.add_argument("--starting-balances", type=str, default="USD=1000.0") + parser.add_argument("--theta-values", type=str, + default="0.0003,0.0005,0.0008") + parser.add_argument("--trade-capital-values", + type=str, default="50,100,150") + parser.add_argument( + "--pair-universes", + type=str, + default="BTC/USD|ETH/BTC|ETH/USD", + help="Semicolon-separated universes, each with | delimited pairs", + ) + parser.add_argument("--staleness-threshold-values", + type=str, default="3,5,8") + parser.add_argument("--train-ratio", type=float, default=0.7) + parser.add_argument("--output", type=Path, + default=Path("ops/backtesting/parameter_sweep_results.json")) + + parser.add_argument("--min-test-realized-pnl-usd", type=float, default=0.0) + parser.add_argument("--min-test-win-rate", type=float, default=0.5) + parser.add_argument("--min-test-fill-rate", type=float, default=0.9) + parser.add_argument("--max-test-drawdown-usd", type=float, default=25.0) + parser.add_argument("--max-generalization-gap-ratio", + type=float, default=0.5) + + args = parser.parse_args() + + events = load_replay_events(args.events) + symbols = sorted({event.symbol.upper() for event in events}) + cycles_by_pair = _build_graph_from_symbols(symbols) + if not cycles_by_pair: + raise SystemExit( + "No triangular cycles found in supplied replay events") + + grid = build_parameter_grid( + theta_values=_parse_float_list(args.theta_values), + trade_capital_values=_parse_float_list(args.trade_capital_values), + pair_universes=_parse_pair_universes(args.pair_universes), + staleness_threshold_values=_parse_float_list( + args.staleness_threshold_values), + ) + + artifacts = run_parameter_search( + events=events, + cycles_by_pair=cycles_by_pair, + parameter_grid=grid, + starting_balances=_parse_balances(args.starting_balances), + train_ratio=args.train_ratio, + promotion_criteria=PromotionCriteria( + min_test_realized_pnl_usd=args.min_test_realized_pnl_usd, + min_test_win_rate=args.min_test_win_rate, + min_test_fill_rate=args.min_test_fill_rate, + max_test_drawdown_usd=args.max_test_drawdown_usd, + max_generalization_gap_ratio=args.max_generalization_gap_ratio, + ), + ) + + persist_sweep_results(args.output, artifacts) + + print(f"Completed sweep combinations: {len(artifacts.results)}") + print(f"Promotion-ready combinations: {len(artifacts.promoted)}") + print(f"Results written: {args.output}") + + _print_top_results(artifacts.results) + if artifacts.promoted: + print("Promotion candidates (paper-trading canary):") + _print_top_results(artifacts.promoted) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/arbitrade/backtesting/__init__.py b/src/arbitrade/backtesting/__init__.py index a657f6c..e6cbc60 100644 --- a/src/arbitrade/backtesting/__init__.py +++ b/src/arbitrade/backtesting/__init__.py @@ -6,6 +6,16 @@ from arbitrade.backtesting.replay import ( ReplayClock, load_replay_events, ) +from arbitrade.backtesting.sweep import ( + PromotionCriteria, + SweepArtifacts, + SweepParameters, + SweepResult, + build_parameter_grid, + persist_sweep_results, + run_parameter_search, + split_events_time_windows, +) __all__ = [ "ReplayClock", @@ -14,4 +24,12 @@ __all__ = [ "BacktestReport", "BacktestReplayEngine", "load_replay_events", + "SweepParameters", + "SweepResult", + "SweepArtifacts", + "PromotionCriteria", + "split_events_time_windows", + "build_parameter_grid", + "run_parameter_search", + "persist_sweep_results", ] diff --git a/src/arbitrade/backtesting/sweep.py b/src/arbitrade/backtesting/sweep.py new file mode 100644 index 0000000..67c44a7 --- /dev/null +++ b/src/arbitrade/backtesting/sweep.py @@ -0,0 +1,396 @@ +from __future__ import annotations + +import asyncio +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from datetime import UTC, datetime +from pathlib import Path + +import orjson + +from arbitrade.backtesting.replay import ( + BacktestConfig, + BacktestReplayEngine, + BacktestReport, + ReplayBookEvent, +) +from arbitrade.detection.graph import TriangularCycle + + +@dataclass(frozen=True, slots=True) +class SweepParameters: + min_profit_threshold: float + trade_capital: float + pair_universe: tuple[str, ...] + staleness_threshold_seconds: float + + +@dataclass(frozen=True, slots=True) +class PromotionCriteria: + min_test_realized_pnl_usd: float = 0.0 + min_test_win_rate: float = 0.5 + min_test_fill_rate: float = 0.9 + max_test_drawdown_usd: float = 25.0 + max_generalization_gap_ratio: float = 0.5 + + +@dataclass(frozen=True, slots=True) +class SweepResult: + parameters: SweepParameters + train_report: BacktestReport + test_report: BacktestReport + train_score: float + test_score: float + generalization_gap_ratio: float + overfit_detected: bool + promotion_ready: bool + promotion_reasons: tuple[str, ...] + train_event_count: int + test_event_count: int + + +@dataclass(frozen=True, slots=True) +class SweepArtifacts: + results: tuple[SweepResult, ...] + promoted: tuple[SweepResult, ...] + train_window: tuple[datetime, datetime] | None + test_window: tuple[datetime, datetime] | None + + +def split_events_time_windows( + events: Sequence[ReplayBookEvent], + *, + train_ratio: float, +) -> tuple[list[ReplayBookEvent], list[ReplayBookEvent]]: + if train_ratio <= 0.0 or train_ratio >= 1.0: + raise ValueError("train_ratio must be between 0 and 1") + if len(events) < 2: + raise ValueError("at least two events are required for time split") + + split_index = max(1, min(len(events) - 1, int(len(events) * train_ratio))) + return list(events[:split_index]), list(events[split_index:]) + + +def build_parameter_grid( + *, + theta_values: Sequence[float], + trade_capital_values: Sequence[float], + pair_universes: Sequence[Sequence[str]], + staleness_threshold_values: Sequence[float], +) -> list[SweepParameters]: + if not theta_values: + raise ValueError("theta_values must not be empty") + if not trade_capital_values: + raise ValueError("trade_capital_values must not be empty") + if not pair_universes: + raise ValueError("pair_universes must not be empty") + if not staleness_threshold_values: + raise ValueError("staleness_threshold_values must not be empty") + + grid: list[SweepParameters] = [] + for theta in theta_values: + for trade_capital in trade_capital_values: + for pair_universe in pair_universes: + normalized_universe = tuple( + sorted({pair.upper() for pair in pair_universe})) + for staleness_threshold in staleness_threshold_values: + grid.append( + SweepParameters( + min_profit_threshold=float(theta), + trade_capital=float(trade_capital), + pair_universe=normalized_universe, + staleness_threshold_seconds=float( + staleness_threshold), + ) + ) + return grid + + +def _filter_events_for_parameters( + events: Sequence[ReplayBookEvent], + *, + pair_universe: set[str], + staleness_threshold_seconds: float, +) -> list[ReplayBookEvent]: + if staleness_threshold_seconds <= 0.0: + raise ValueError("staleness_threshold_seconds must be > 0") + + filtered: list[ReplayBookEvent] = [] + last_seen_by_symbol: dict[str, datetime] = {} + + for event in events: + symbol = event.symbol.upper() + if symbol not in pair_universe: + continue + + previous = last_seen_by_symbol.get(symbol) + last_seen_by_symbol[symbol] = event.occurred_at + if previous is None: + filtered.append(event) + continue + + gap_seconds = (event.occurred_at - previous).total_seconds() + if gap_seconds <= staleness_threshold_seconds: + filtered.append(event) + + return filtered + + +def _restrict_cycles_by_pair( + cycles_by_pair: Mapping[str, list[TriangularCycle]], + *, + pair_universe: set[str], +) -> dict[str, list[TriangularCycle]]: + restricted: dict[str, list[TriangularCycle]] = {} + for pair_symbol, cycles in cycles_by_pair.items(): + normalized_pair = pair_symbol.upper() + if normalized_pair not in pair_universe: + continue + + kept = [cycle for cycle in cycles if all( + pair.upper() in pair_universe for pair in cycle.pairs)] + if kept: + restricted[normalized_pair] = kept + return restricted + + +def _score_report(report: BacktestReport) -> float: + win_rate_bonus = (report.win_rate or 0.0) * 100.0 + fill_rate_bonus = (report.fill_rate or 0.0) * 50.0 + return report.realized_pnl_usd + win_rate_bonus + fill_rate_bonus - report.max_drawdown_usd + + +def _safe_ratio(numerator: float, denominator: float) -> float: + if denominator <= 0.0: + return 0.0 if numerator <= 0.0 else 1.0 + return max(0.0, numerator / denominator) + + +def _evaluate_promotion( + *, + result: SweepResult, + criteria: PromotionCriteria, +) -> tuple[bool, tuple[str, ...]]: + reasons: list[str] = [] + test = result.test_report + + if test.realized_pnl_usd < criteria.min_test_realized_pnl_usd: + reasons.append( + "test_realized_pnl_below_threshold" + ) + if (test.win_rate or 0.0) < criteria.min_test_win_rate: + reasons.append("test_win_rate_below_threshold") + if (test.fill_rate or 0.0) < criteria.min_test_fill_rate: + reasons.append("test_fill_rate_below_threshold") + if test.max_drawdown_usd > criteria.max_test_drawdown_usd: + reasons.append("test_drawdown_above_threshold") + if result.generalization_gap_ratio > criteria.max_generalization_gap_ratio: + reasons.append("generalization_gap_above_threshold") + + return (not reasons), tuple(reasons) + + +def _run_backtest( + *, + events: Sequence[ReplayBookEvent], + cycles_by_pair: Mapping[str, list[TriangularCycle]], + available_pairs: Sequence[str], + config: BacktestConfig, + starting_balances: Mapping[str, float], +) -> BacktestReport: + started_at = events[0].occurred_at if events else datetime.now(UTC) + engine = BacktestReplayEngine( + cycles_by_pair=cycles_by_pair, + available_pairs=available_pairs, + config=config, + started_at=started_at, + ) + return asyncio.run(engine.run(events, starting_balances=starting_balances)) + + +def run_parameter_search( + *, + events: Sequence[ReplayBookEvent], + cycles_by_pair: Mapping[str, list[TriangularCycle]], + parameter_grid: Sequence[SweepParameters], + starting_balances: Mapping[str, float], + train_ratio: float, + promotion_criteria: PromotionCriteria | None = None, + max_concurrent_trades: int = 1, + max_depth_levels: int = 10, + quote_asset: str = "USD", +) -> SweepArtifacts: + criteria = promotion_criteria or PromotionCriteria() + train_events, test_events = split_events_time_windows( + events, train_ratio=train_ratio) + + results: list[SweepResult] = [] + promoted: list[SweepResult] = [] + + for parameters in parameter_grid: + allowed_pairs = set(parameters.pair_universe) + filtered_train = _filter_events_for_parameters( + train_events, + pair_universe=allowed_pairs, + staleness_threshold_seconds=parameters.staleness_threshold_seconds, + ) + filtered_test = _filter_events_for_parameters( + test_events, + pair_universe=allowed_pairs, + staleness_threshold_seconds=parameters.staleness_threshold_seconds, + ) + + if not filtered_train or not filtered_test: + continue + + restricted_cycles = _restrict_cycles_by_pair( + cycles_by_pair, + pair_universe=allowed_pairs, + ) + if not restricted_cycles: + continue + + config = BacktestConfig( + min_profit_threshold=parameters.min_profit_threshold, + trade_capital=parameters.trade_capital, + max_concurrent_trades=max_concurrent_trades, + max_depth_levels=max_depth_levels, + quote_asset=quote_asset, + ) + + train_report = _run_backtest( + events=filtered_train, + cycles_by_pair=restricted_cycles, + available_pairs=sorted(allowed_pairs), + config=config, + starting_balances=starting_balances, + ) + test_report = _run_backtest( + events=filtered_test, + cycles_by_pair=restricted_cycles, + available_pairs=sorted(allowed_pairs), + config=config, + starting_balances=starting_balances, + ) + + train_score = _score_report(train_report) + test_score = _score_report(test_report) + score_drop = max(0.0, train_score - test_score) + generalization_gap_ratio = _safe_ratio(score_drop, abs(train_score)) + overfit_detected = generalization_gap_ratio > criteria.max_generalization_gap_ratio + + base_result = SweepResult( + parameters=parameters, + train_report=train_report, + test_report=test_report, + train_score=train_score, + test_score=test_score, + generalization_gap_ratio=generalization_gap_ratio, + overfit_detected=overfit_detected, + promotion_ready=False, + promotion_reasons=(), + train_event_count=len(filtered_train), + test_event_count=len(filtered_test), + ) + promotion_ready, promotion_reasons = _evaluate_promotion( + result=base_result, criteria=criteria) + completed_result = SweepResult( + parameters=base_result.parameters, + train_report=base_result.train_report, + test_report=base_result.test_report, + train_score=base_result.train_score, + test_score=base_result.test_score, + generalization_gap_ratio=base_result.generalization_gap_ratio, + overfit_detected=base_result.overfit_detected, + promotion_ready=promotion_ready, + promotion_reasons=promotion_reasons, + train_event_count=base_result.train_event_count, + test_event_count=base_result.test_event_count, + ) + + results.append(completed_result) + if completed_result.promotion_ready: + promoted.append(completed_result) + + results.sort(key=lambda item: item.test_score, reverse=True) + promoted.sort(key=lambda item: item.test_score, reverse=True) + + train_window: tuple[datetime, datetime] | None = None + test_window: tuple[datetime, datetime] | None = None + if train_events: + train_window = (train_events[0].occurred_at, + train_events[-1].occurred_at) + if test_events: + test_window = (test_events[0].occurred_at, test_events[-1].occurred_at) + + return SweepArtifacts( + results=tuple(results), + promoted=tuple(promoted), + train_window=train_window, + test_window=test_window, + ) + + +def _report_to_dict(report: BacktestReport) -> dict[str, object]: + return { + "started_at": report.started_at.isoformat(), + "finished_at": report.finished_at.isoformat(), + "processed_events": report.processed_events, + "opportunities_seen": report.opportunities_seen, + "trades_executed": report.trades_executed, + "win_rate": report.win_rate, + "fill_rate": report.fill_rate, + "realized_pnl_usd": report.realized_pnl_usd, + "max_drawdown_usd": report.max_drawdown_usd, + "miss_reasons": dict(report.miss_reasons), + "execution_latency_p50_ms": report.execution_latency_p50_ms, + "execution_latency_p95_ms": report.execution_latency_p95_ms, + "execution_latency_p99_ms": report.execution_latency_p99_ms, + } + + +def persist_sweep_results(path: Path, artifacts: SweepArtifacts) -> None: + payload = { + "generated_at": datetime.now(UTC).isoformat(), + "train_window": ( + { + "started_at": artifacts.train_window[0].isoformat(), + "finished_at": artifacts.train_window[1].isoformat(), + } + if artifacts.train_window is not None + else None + ), + "test_window": ( + { + "started_at": artifacts.test_window[0].isoformat(), + "finished_at": artifacts.test_window[1].isoformat(), + } + if artifacts.test_window is not None + else None + ), + "results": [ + { + "parameters": { + "min_profit_threshold": result.parameters.min_profit_threshold, + "trade_capital": result.parameters.trade_capital, + "pair_universe": list(result.parameters.pair_universe), + "staleness_threshold_seconds": result.parameters.staleness_threshold_seconds, + }, + "train_report": _report_to_dict(result.train_report), + "test_report": _report_to_dict(result.test_report), + "train_score": result.train_score, + "test_score": result.test_score, + "generalization_gap_ratio": result.generalization_gap_ratio, + "overfit_detected": result.overfit_detected, + "promotion_ready": result.promotion_ready, + "promotion_reasons": list(result.promotion_reasons), + "train_event_count": result.train_event_count, + "test_event_count": result.test_event_count, + } + for result in artifacts.results + ], + } + + path.parent.mkdir(parents=True, exist_ok=True) + path.write_bytes(orjson.dumps( + payload, option=orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS)) diff --git a/tests/unit/test_backtesting_sweep.py b/tests/unit/test_backtesting_sweep.py new file mode 100644 index 0000000..974e5c0 --- /dev/null +++ b/tests/unit/test_backtesting_sweep.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +from datetime import UTC, datetime, timedelta + +from arbitrade.backtesting.replay import ReplayBookEvent +from arbitrade.backtesting.sweep import ( + PromotionCriteria, + SweepResult, + build_parameter_grid, + run_parameter_search, + split_events_time_windows, +) +from arbitrade.detection.graph import CurrencyGraph +from arbitrade.exchange.models import BookLevel + + +def _build_cycles() -> dict[str, list]: + graph = CurrencyGraph() + graph.add_pair("USD", "BTC", "BTC/USD") + graph.add_pair("BTC", "ETH", "ETH/BTC") + graph.add_pair("ETH", "USD", "ETH/USD") + return graph.index_cycles_by_pair(graph.triangular_cycles()) + + +def _events() -> list[ReplayBookEvent]: + base_time = datetime(2026, 6, 1, 12, 0, tzinfo=UTC) + rows: list[ReplayBookEvent] = [] + for index in range(12): + tick = base_time + timedelta(seconds=index) + rows.extend( + [ + ReplayBookEvent( + occurred_at=tick, + symbol="BTC/USD", + bids=(BookLevel(price=99.5, volume=10.0),), + asks=(BookLevel(price=100.0, volume=10.0),), + ), + ReplayBookEvent( + occurred_at=tick, + symbol="ETH/BTC", + bids=(BookLevel(price=0.051, volume=10.0),), + asks=(BookLevel(price=0.050, volume=10.0),), + ), + ReplayBookEvent( + occurred_at=tick, + symbol="ETH/USD", + bids=(BookLevel(price=110.0, volume=10.0),), + asks=(BookLevel(price=110.5, volume=10.0),), + ), + ] + ) + return rows + + +def test_split_events_time_windows_returns_non_empty_train_and_test() -> None: + train, test = split_events_time_windows(_events(), train_ratio=0.7) + + assert train + assert test + assert train[-1].occurred_at <= test[0].occurred_at + + +def test_build_parameter_grid_expands_combinations() -> None: + grid = build_parameter_grid( + theta_values=[0.0005, 0.001], + trade_capital_values=[100.0], + pair_universes=[["BTC/USD", "ETH/BTC", "ETH/USD"]], + staleness_threshold_values=[3.0, 5.0], + ) + + assert len(grid) == 4 + + +def test_run_parameter_search_produces_ranked_results_with_overfit_guard() -> None: + artifacts = run_parameter_search( + events=_events(), + cycles_by_pair=_build_cycles(), + parameter_grid=build_parameter_grid( + theta_values=[0.0005, 0.001], + trade_capital_values=[75.0, 100.0], + pair_universes=[["BTC/USD", "ETH/BTC", "ETH/USD"]], + staleness_threshold_values=[5.0], + ), + starting_balances={"USD": 2000.0}, + train_ratio=0.7, + promotion_criteria=PromotionCriteria( + min_test_realized_pnl_usd=-1000.0, + min_test_win_rate=0.0, + min_test_fill_rate=0.0, + max_test_drawdown_usd=1_000_000.0, + max_generalization_gap_ratio=0.9, + ), + ) + + assert artifacts.results + assert artifacts.results[0].test_score >= artifacts.results[-1].test_score + + first: SweepResult = artifacts.results[0] + assert first.train_event_count > 0 + assert first.test_event_count > 0 + assert first.generalization_gap_ratio >= 0.0 + assert isinstance(first.promotion_ready, bool)