feat: Add backtesting parameter sweep support and related functionality

2026-06-02 08:44:10 +02:00
parent 8ef8dc801d
commit f612c8533a
6 changed files with 685 additions and 7 deletions
@@ -18,6 +18,9 @@
 - Added synthetic latency profiler scenarios and CLI scripts for baseline generation and regression checks.
 - Added latency baseline/threshold artifacts and CI latency guardrail enforcement.
 - Added deterministic replay backtesting engine, CLI script, and unit coverage for JSONL event replay.
 - Added backtesting parameter sweep support (`scripts/backtest_sweep.py`) for theta, trade-capital, pair-universe, and staleness-threshold grid search.
 - Added persisted sweep artifacts with ranked in-sample/out-of-sample results and promotion-ready candidate reporting.
 - Added out-of-sample overfit guards via train/test time-window split and generalization-gap checks.
 - Added dashboard controls for tradable pair universe selection and strategy mode/parameter configuration.
 ### Changed
@@ -104,7 +104,7 @@ Minimum `.env` values:
 ```env
 APP_ENV=dev
 APP_HOST=0.0.0.0
-APP_PORT=8000
+APP_PORT=9090
 LOG_LEVEL=INFO
 LOG_JSON=true
 DUCKDB_PATH=./data/arbitrade.duckdb
@@ -132,8 +132,8 @@ python -m arbitrade.main
 Health endpoints:
- HTML: `http://localhost:8000/`
+- HTML: `http://localhost:9090/`
- JSON: `http://localhost:8000/health`
+- JSON: `http://localhost:9090/health`
 ## Database
@@ -283,12 +283,12 @@ Set these in Coolify application settings:
 - Build Command: leave empty.
 - Install Command: leave empty.
 - Start Command: leave empty unless you explicitly want to override the image default.
- Port: `8000`
+- Port: `9090` (coolify uses `8000` internally)
 ### 3) Configure health check and networking
 - Health Check Path: `/health`
- Exposed Port: `8000`
+- Exposed Port: `9090`
 - Use Coolify-generated domain or attach your own domain.
 ### 4) Configure persistent storage
@@ -305,7 +305,7 @@ Add runtime environment variables in Coolify (UI: Environment Variables):
 - `APP_ENV=prod`
 - `APP_HOST=0.0.0.0`
- `APP_PORT=8000`
+- `APP_PORT=9090`
 - `DUCKDB_PATH=/app/data/arbitrade.duckdb`
 - `LOG_LEVEL=INFO`
 - `LOG_JSON=true`
@@ -431,6 +431,12 @@ Run a deterministic replay backtest from a JSONL event stream:
 python scripts/backtest_replay.py --events path\to\replay.jsonl --starting-balances USD=1000.0
 ```
 Run parameter sweep with train/test split and promotion scoring:
 ```powershell
 python scripts/backtest_sweep.py --events path\to\replay.jsonl --starting-balances USD=1000.0 --output ops/backtesting/parameter_sweep_results.json
 ```
 Replay event format:
 ```json
@@ -447,7 +453,9 @@ Notes:
 - Events are replayed in timestamp order.
 - The replay engine reuses the production detector, pre-trade validation, trade limits, and execution sequencer.
 - The simulated execution path applies configurable slippage and execution latency so reports include deterministic trade/miss statistics.
-  Latency baseline and threshold artifacts:
+- Parameter sweep splits replay data into in-sample and out-of-sample windows, ranks configurations by out-of-sample score, and flags overfit via train/test generalization-gap checks.
 - Sweep output persists ranked combinations and promotion-ready candidates for paper-trading canary promotion decisions.
 - Latency baseline and threshold artifacts:
 - `ops/performance/latency_baseline.json`
 - `ops/performance/latency_thresholds.json`
@@ -0,0 +1,151 @@
 from __future__ import annotations
 import argparse
 from collections.abc import Mapping, Sequence
 from pathlib import Path
 from arbitrade.backtesting import load_replay_events
 from arbitrade.backtesting.sweep import (
    PromotionCriteria,
    SweepResult,
    build_parameter_grid,
    persist_sweep_results,
    run_parameter_search,
 )
 from arbitrade.detection.graph import CurrencyGraph, TriangularCycle
 def _parse_balances(raw: str) -> Mapping[str, float]:
    balances: dict[str, float] = {}
    for entry in raw.split(","):
        stripped = entry.strip()
        if not stripped:
            continue
        asset, value = stripped.split("=", 1)
        balances[asset.strip().upper()] = float(value)
    return balances
 def _parse_float_list(raw: str) -> list[float]:
    values = [item.strip() for item in raw.split(",") if item.strip()]
    if not values:
        raise ValueError("expected at least one numeric value")
    return [float(value) for value in values]
 def _parse_pair_universes(raw: str) -> list[tuple[str, ...]]:
    universes: list[tuple[str, ...]] = []
    for chunk in raw.split(";"):
        symbols = tuple(item.strip().upper()
                        for item in chunk.split("|") if item.strip())
        if symbols:
            universes.append(symbols)
    if not universes:
        raise ValueError("at least one pair universe must be provided")
    return universes
 def _build_graph_from_symbols(symbols: Sequence[str]) -> dict[str, list[TriangularCycle]]:
    graph = CurrencyGraph()
    for symbol in symbols:
        normalized = symbol.upper()
        if "/" not in normalized:
            continue
        base, quote = normalized.split("/", 1)
        graph.add_pair(base, quote, normalized)
    cycles = graph.triangular_cycles()
    return graph.index_cycles_by_pair(cycles)
 def _print_top_results(results: Sequence[SweepResult], *, limit: int = 5) -> None:
    print(f"Top {min(limit, len(results))} result(s) by out-of-sample score:")
    for index, result in enumerate(results[:limit], start=1):
        print(
            "- "
            f"#{index} "
            f"theta={result.parameters.min_profit_threshold:.6f}, "
            f"capital={result.parameters.trade_capital:.2f}, "
            f"pairs={','.join(result.parameters.pair_universe)}, "
            f"staleness={result.parameters.staleness_threshold_seconds:.2f}s, "
            f"test_score={result.test_score:.4f}, "
            f"promotion_ready={result.promotion_ready}"
        )
 def main() -> int:
    parser = argparse.ArgumentParser(
        description="Run backtesting parameter sweep with train/test split.")
    parser.add_argument("--events", type=Path, required=True)
    parser.add_argument("--starting-balances", type=str, default="USD=1000.0")
    parser.add_argument("--theta-values", type=str,
                        default="0.0003,0.0005,0.0008")
    parser.add_argument("--trade-capital-values",
                        type=str, default="50,100,150")
    parser.add_argument(
        "--pair-universes",
        type=str,
        default="BTC/USD|ETH/BTC|ETH/USD",
        help="Semicolon-separated universes, each with | delimited pairs",
    )
    parser.add_argument("--staleness-threshold-values",
                        type=str, default="3,5,8")
    parser.add_argument("--train-ratio", type=float, default=0.7)
    parser.add_argument("--output", type=Path,
                        default=Path("ops/backtesting/parameter_sweep_results.json"))
    parser.add_argument("--min-test-realized-pnl-usd", type=float, default=0.0)
    parser.add_argument("--min-test-win-rate", type=float, default=0.5)
    parser.add_argument("--min-test-fill-rate", type=float, default=0.9)
    parser.add_argument("--max-test-drawdown-usd", type=float, default=25.0)
    parser.add_argument("--max-generalization-gap-ratio",
                        type=float, default=0.5)
    args = parser.parse_args()
    events = load_replay_events(args.events)
    symbols = sorted({event.symbol.upper() for event in events})
    cycles_by_pair = _build_graph_from_symbols(symbols)
    if not cycles_by_pair:
        raise SystemExit(
            "No triangular cycles found in supplied replay events")
    grid = build_parameter_grid(
        theta_values=_parse_float_list(args.theta_values),
        trade_capital_values=_parse_float_list(args.trade_capital_values),
        pair_universes=_parse_pair_universes(args.pair_universes),
        staleness_threshold_values=_parse_float_list(
            args.staleness_threshold_values),
    )
    artifacts = run_parameter_search(
        events=events,
        cycles_by_pair=cycles_by_pair,
        parameter_grid=grid,
        starting_balances=_parse_balances(args.starting_balances),
        train_ratio=args.train_ratio,
        promotion_criteria=PromotionCriteria(
            min_test_realized_pnl_usd=args.min_test_realized_pnl_usd,
            min_test_win_rate=args.min_test_win_rate,
            min_test_fill_rate=args.min_test_fill_rate,
            max_test_drawdown_usd=args.max_test_drawdown_usd,
            max_generalization_gap_ratio=args.max_generalization_gap_ratio,
        ),
    )
    persist_sweep_results(args.output, artifacts)
    print(f"Completed sweep combinations: {len(artifacts.results)}")
    print(f"Promotion-ready combinations: {len(artifacts.promoted)}")
    print(f"Results written: {args.output}")
    _print_top_results(artifacts.results)
    if artifacts.promoted:
        print("Promotion candidates (paper-trading canary):")
        _print_top_results(artifacts.promoted)
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
@@ -6,6 +6,16 @@ from arbitrade.backtesting.replay import (
    ReplayClock,
    load_replay_events,
 )
 from arbitrade.backtesting.sweep import (
    PromotionCriteria,
    SweepArtifacts,
    SweepParameters,
    SweepResult,
    build_parameter_grid,
    persist_sweep_results,
    run_parameter_search,
    split_events_time_windows,
 )
 __all__ = [
    "ReplayClock",
@@ -14,4 +24,12 @@ __all__ = [
    "BacktestReport",
    "BacktestReplayEngine",
    "load_replay_events",
    "SweepParameters",
    "SweepResult",
    "SweepArtifacts",
    "PromotionCriteria",
    "split_events_time_windows",
    "build_parameter_grid",
    "run_parameter_search",
    "persist_sweep_results",
 ]
@@ -0,0 +1,396 @@
 from __future__ import annotations
 import asyncio
 from collections.abc import Mapping, Sequence
 from dataclasses import dataclass
 from datetime import UTC, datetime
 from pathlib import Path
 import orjson
 from arbitrade.backtesting.replay import (
    BacktestConfig,
    BacktestReplayEngine,
    BacktestReport,
    ReplayBookEvent,
 )
 from arbitrade.detection.graph import TriangularCycle
@dataclass(frozen=True, slots=True)
 class SweepParameters:
    min_profit_threshold: float
    trade_capital: float
    pair_universe: tuple[str, ...]
    staleness_threshold_seconds: float
@dataclass(frozen=True, slots=True)
 class PromotionCriteria:
    min_test_realized_pnl_usd: float = 0.0
    min_test_win_rate: float = 0.5
    min_test_fill_rate: float = 0.9
    max_test_drawdown_usd: float = 25.0
    max_generalization_gap_ratio: float = 0.5
@dataclass(frozen=True, slots=True)
 class SweepResult:
    parameters: SweepParameters
    train_report: BacktestReport
    test_report: BacktestReport
    train_score: float
    test_score: float
    generalization_gap_ratio: float
    overfit_detected: bool
    promotion_ready: bool
    promotion_reasons: tuple[str, ...]
    train_event_count: int
    test_event_count: int
@dataclass(frozen=True, slots=True)
 class SweepArtifacts:
    results: tuple[SweepResult, ...]
    promoted: tuple[SweepResult, ...]
    train_window: tuple[datetime, datetime] | None
    test_window: tuple[datetime, datetime] | None
 def split_events_time_windows(
    events: Sequence[ReplayBookEvent],
    *,
    train_ratio: float,
 ) -> tuple[list[ReplayBookEvent], list[ReplayBookEvent]]:
    if train_ratio <= 0.0 or train_ratio >= 1.0:
        raise ValueError("train_ratio must be between 0 and 1")
    if len(events) < 2:
        raise ValueError("at least two events are required for time split")
    split_index = max(1, min(len(events) - 1, int(len(events) * train_ratio)))
    return list(events[:split_index]), list(events[split_index:])
 def build_parameter_grid(
    *,
    theta_values: Sequence[float],
    trade_capital_values: Sequence[float],
    pair_universes: Sequence[Sequence[str]],
    staleness_threshold_values: Sequence[float],
 ) -> list[SweepParameters]:
    if not theta_values:
        raise ValueError("theta_values must not be empty")
    if not trade_capital_values:
        raise ValueError("trade_capital_values must not be empty")
    if not pair_universes:
        raise ValueError("pair_universes must not be empty")
    if not staleness_threshold_values:
        raise ValueError("staleness_threshold_values must not be empty")
    grid: list[SweepParameters] = []
    for theta in theta_values:
        for trade_capital in trade_capital_values:
            for pair_universe in pair_universes:
                normalized_universe = tuple(
                    sorted({pair.upper() for pair in pair_universe}))
                for staleness_threshold in staleness_threshold_values:
                    grid.append(
                        SweepParameters(
                            min_profit_threshold=float(theta),
                            trade_capital=float(trade_capital),
                            pair_universe=normalized_universe,
                            staleness_threshold_seconds=float(
                                staleness_threshold),
                        )
                    )
    return grid
 def _filter_events_for_parameters(
    events: Sequence[ReplayBookEvent],
    *,
    pair_universe: set[str],
    staleness_threshold_seconds: float,
 ) -> list[ReplayBookEvent]:
    if staleness_threshold_seconds <= 0.0:
        raise ValueError("staleness_threshold_seconds must be > 0")
    filtered: list[ReplayBookEvent] = []
    last_seen_by_symbol: dict[str, datetime] = {}
    for event in events:
        symbol = event.symbol.upper()
        if symbol not in pair_universe:
            continue
        previous = last_seen_by_symbol.get(symbol)
        last_seen_by_symbol[symbol] = event.occurred_at
        if previous is None:
            filtered.append(event)
            continue
        gap_seconds = (event.occurred_at - previous).total_seconds()
        if gap_seconds <= staleness_threshold_seconds:
            filtered.append(event)
    return filtered
 def _restrict_cycles_by_pair(
    cycles_by_pair: Mapping[str, list[TriangularCycle]],
    *,
    pair_universe: set[str],
 ) -> dict[str, list[TriangularCycle]]:
    restricted: dict[str, list[TriangularCycle]] = {}
    for pair_symbol, cycles in cycles_by_pair.items():
        normalized_pair = pair_symbol.upper()
        if normalized_pair not in pair_universe:
            continue
        kept = [cycle for cycle in cycles if all(
            pair.upper() in pair_universe for pair in cycle.pairs)]
        if kept:
            restricted[normalized_pair] = kept
    return restricted
 def _score_report(report: BacktestReport) -> float:
    win_rate_bonus = (report.win_rate or 0.0) * 100.0
    fill_rate_bonus = (report.fill_rate or 0.0) * 50.0
    return report.realized_pnl_usd + win_rate_bonus + fill_rate_bonus - report.max_drawdown_usd
 def _safe_ratio(numerator: float, denominator: float) -> float:
    if denominator <= 0.0:
        return 0.0 if numerator <= 0.0 else 1.0
    return max(0.0, numerator / denominator)
 def _evaluate_promotion(
    *,
    result: SweepResult,
    criteria: PromotionCriteria,
 ) -> tuple[bool, tuple[str, ...]]:
    reasons: list[str] = []
    test = result.test_report
    if test.realized_pnl_usd < criteria.min_test_realized_pnl_usd:
        reasons.append(
            "test_realized_pnl_below_threshold"
        )
    if (test.win_rate or 0.0) < criteria.min_test_win_rate:
        reasons.append("test_win_rate_below_threshold")
    if (test.fill_rate or 0.0) < criteria.min_test_fill_rate:
        reasons.append("test_fill_rate_below_threshold")
    if test.max_drawdown_usd > criteria.max_test_drawdown_usd:
        reasons.append("test_drawdown_above_threshold")
    if result.generalization_gap_ratio > criteria.max_generalization_gap_ratio:
        reasons.append("generalization_gap_above_threshold")
    return (not reasons), tuple(reasons)
 def _run_backtest(
    *,
    events: Sequence[ReplayBookEvent],
    cycles_by_pair: Mapping[str, list[TriangularCycle]],
    available_pairs: Sequence[str],
    config: BacktestConfig,
    starting_balances: Mapping[str, float],
 ) -> BacktestReport:
    started_at = events[0].occurred_at if events else datetime.now(UTC)
    engine = BacktestReplayEngine(
        cycles_by_pair=cycles_by_pair,
        available_pairs=available_pairs,
        config=config,
        started_at=started_at,
    )
    return asyncio.run(engine.run(events, starting_balances=starting_balances))
 def run_parameter_search(
    *,
    events: Sequence[ReplayBookEvent],
    cycles_by_pair: Mapping[str, list[TriangularCycle]],
    parameter_grid: Sequence[SweepParameters],
    starting_balances: Mapping[str, float],
    train_ratio: float,
    promotion_criteria: PromotionCriteria | None = None,
    max_concurrent_trades: int = 1,
    max_depth_levels: int = 10,
    quote_asset: str = "USD",
 ) -> SweepArtifacts:
    criteria = promotion_criteria or PromotionCriteria()
    train_events, test_events = split_events_time_windows(
        events, train_ratio=train_ratio)
    results: list[SweepResult] = []
    promoted: list[SweepResult] = []
    for parameters in parameter_grid:
        allowed_pairs = set(parameters.pair_universe)
        filtered_train = _filter_events_for_parameters(
            train_events,
            pair_universe=allowed_pairs,
            staleness_threshold_seconds=parameters.staleness_threshold_seconds,
        )
        filtered_test = _filter_events_for_parameters(
            test_events,
            pair_universe=allowed_pairs,
            staleness_threshold_seconds=parameters.staleness_threshold_seconds,
        )
        if not filtered_train or not filtered_test:
            continue
        restricted_cycles = _restrict_cycles_by_pair(
            cycles_by_pair,
            pair_universe=allowed_pairs,
        )
        if not restricted_cycles:
            continue
        config = BacktestConfig(
            min_profit_threshold=parameters.min_profit_threshold,
            trade_capital=parameters.trade_capital,
            max_concurrent_trades=max_concurrent_trades,
            max_depth_levels=max_depth_levels,
            quote_asset=quote_asset,
        )
        train_report = _run_backtest(
            events=filtered_train,
            cycles_by_pair=restricted_cycles,
            available_pairs=sorted(allowed_pairs),
            config=config,
            starting_balances=starting_balances,
        )
        test_report = _run_backtest(
            events=filtered_test,
            cycles_by_pair=restricted_cycles,
            available_pairs=sorted(allowed_pairs),
            config=config,
            starting_balances=starting_balances,
        )
        train_score = _score_report(train_report)
        test_score = _score_report(test_report)
        score_drop = max(0.0, train_score - test_score)
        generalization_gap_ratio = _safe_ratio(score_drop, abs(train_score))
        overfit_detected = generalization_gap_ratio > criteria.max_generalization_gap_ratio
        base_result = SweepResult(
            parameters=parameters,
            train_report=train_report,
            test_report=test_report,
            train_score=train_score,
            test_score=test_score,
            generalization_gap_ratio=generalization_gap_ratio,
            overfit_detected=overfit_detected,
            promotion_ready=False,
            promotion_reasons=(),
            train_event_count=len(filtered_train),
            test_event_count=len(filtered_test),
        )
        promotion_ready, promotion_reasons = _evaluate_promotion(
            result=base_result, criteria=criteria)
        completed_result = SweepResult(
            parameters=base_result.parameters,
            train_report=base_result.train_report,
            test_report=base_result.test_report,
            train_score=base_result.train_score,
            test_score=base_result.test_score,
            generalization_gap_ratio=base_result.generalization_gap_ratio,
            overfit_detected=base_result.overfit_detected,
            promotion_ready=promotion_ready,
            promotion_reasons=promotion_reasons,
            train_event_count=base_result.train_event_count,
            test_event_count=base_result.test_event_count,
        )
        results.append(completed_result)
        if completed_result.promotion_ready:
            promoted.append(completed_result)
    results.sort(key=lambda item: item.test_score, reverse=True)
    promoted.sort(key=lambda item: item.test_score, reverse=True)
    train_window: tuple[datetime, datetime] | None = None
    test_window: tuple[datetime, datetime] | None = None
    if train_events:
        train_window = (train_events[0].occurred_at,
                        train_events[-1].occurred_at)
    if test_events:
        test_window = (test_events[0].occurred_at, test_events[-1].occurred_at)
    return SweepArtifacts(
        results=tuple(results),
        promoted=tuple(promoted),
        train_window=train_window,
        test_window=test_window,
    )
 def _report_to_dict(report: BacktestReport) -> dict[str, object]:
    return {
        "started_at": report.started_at.isoformat(),
        "finished_at": report.finished_at.isoformat(),
        "processed_events": report.processed_events,
        "opportunities_seen": report.opportunities_seen,
        "trades_executed": report.trades_executed,
        "win_rate": report.win_rate,
        "fill_rate": report.fill_rate,
        "realized_pnl_usd": report.realized_pnl_usd,
        "max_drawdown_usd": report.max_drawdown_usd,
        "miss_reasons": dict(report.miss_reasons),
        "execution_latency_p50_ms": report.execution_latency_p50_ms,
        "execution_latency_p95_ms": report.execution_latency_p95_ms,
        "execution_latency_p99_ms": report.execution_latency_p99_ms,
    }
 def persist_sweep_results(path: Path, artifacts: SweepArtifacts) -> None:
    payload = {
        "generated_at": datetime.now(UTC).isoformat(),
        "train_window": (
            {
                "started_at": artifacts.train_window[0].isoformat(),
                "finished_at": artifacts.train_window[1].isoformat(),
            }
            if artifacts.train_window is not None
            else None
        ),
        "test_window": (
            {
                "started_at": artifacts.test_window[0].isoformat(),
                "finished_at": artifacts.test_window[1].isoformat(),
            }
            if artifacts.test_window is not None
            else None
        ),
        "results": [
            {
                "parameters": {
                    "min_profit_threshold": result.parameters.min_profit_threshold,
                    "trade_capital": result.parameters.trade_capital,
                    "pair_universe": list(result.parameters.pair_universe),
                    "staleness_threshold_seconds": result.parameters.staleness_threshold_seconds,
                },
                "train_report": _report_to_dict(result.train_report),
                "test_report": _report_to_dict(result.test_report),
                "train_score": result.train_score,
                "test_score": result.test_score,
                "generalization_gap_ratio": result.generalization_gap_ratio,
                "overfit_detected": result.overfit_detected,
                "promotion_ready": result.promotion_ready,
                "promotion_reasons": list(result.promotion_reasons),
                "train_event_count": result.train_event_count,
                "test_event_count": result.test_event_count,
            }
            for result in artifacts.results
        ],
    }
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_bytes(orjson.dumps(
        payload, option=orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS))
@@ -0,0 +1,102 @@
 from __future__ import annotations
 from datetime import UTC, datetime, timedelta
 from arbitrade.backtesting.replay import ReplayBookEvent
 from arbitrade.backtesting.sweep import (
    PromotionCriteria,
    SweepResult,
    build_parameter_grid,
    run_parameter_search,
    split_events_time_windows,
 )
 from arbitrade.detection.graph import CurrencyGraph
 from arbitrade.exchange.models import BookLevel
 def _build_cycles() -> dict[str, list]:
    graph = CurrencyGraph()
    graph.add_pair("USD", "BTC", "BTC/USD")
    graph.add_pair("BTC", "ETH", "ETH/BTC")
    graph.add_pair("ETH", "USD", "ETH/USD")
    return graph.index_cycles_by_pair(graph.triangular_cycles())
 def _events() -> list[ReplayBookEvent]:
    base_time = datetime(2026, 6, 1, 12, 0, tzinfo=UTC)
    rows: list[ReplayBookEvent] = []
    for index in range(12):
        tick = base_time + timedelta(seconds=index)
        rows.extend(
            [
                ReplayBookEvent(
                    occurred_at=tick,
                    symbol="BTC/USD",
                    bids=(BookLevel(price=99.5, volume=10.0),),
                    asks=(BookLevel(price=100.0, volume=10.0),),
                ),
                ReplayBookEvent(
                    occurred_at=tick,
                    symbol="ETH/BTC",
                    bids=(BookLevel(price=0.051, volume=10.0),),
                    asks=(BookLevel(price=0.050, volume=10.0),),
                ),
                ReplayBookEvent(
                    occurred_at=tick,
                    symbol="ETH/USD",
                    bids=(BookLevel(price=110.0, volume=10.0),),
                    asks=(BookLevel(price=110.5, volume=10.0),),
                ),
            ]
        )
    return rows
 def test_split_events_time_windows_returns_non_empty_train_and_test() -> None:
    train, test = split_events_time_windows(_events(), train_ratio=0.7)
    assert train
    assert test
    assert train[-1].occurred_at <= test[0].occurred_at
 def test_build_parameter_grid_expands_combinations() -> None:
    grid = build_parameter_grid(
        theta_values=[0.0005, 0.001],
        trade_capital_values=[100.0],
        pair_universes=[["BTC/USD", "ETH/BTC", "ETH/USD"]],
        staleness_threshold_values=[3.0, 5.0],
    )
    assert len(grid) == 4
 def test_run_parameter_search_produces_ranked_results_with_overfit_guard() -> None:
    artifacts = run_parameter_search(
        events=_events(),
        cycles_by_pair=_build_cycles(),
        parameter_grid=build_parameter_grid(
            theta_values=[0.0005, 0.001],
            trade_capital_values=[75.0, 100.0],
            pair_universes=[["BTC/USD", "ETH/BTC", "ETH/USD"]],
            staleness_threshold_values=[5.0],
        ),
        starting_balances={"USD": 2000.0},
        train_ratio=0.7,
        promotion_criteria=PromotionCriteria(
            min_test_realized_pnl_usd=-1000.0,
            min_test_win_rate=0.0,
            min_test_fill_rate=0.0,
            max_test_drawdown_usd=1_000_000.0,
            max_generalization_gap_ratio=0.9,
        ),
    )
    assert artifacts.results
    assert artifacts.results[0].test_score >= artifacts.results[-1].test_score
    first: SweepResult = artifacts.results[0]
    assert first.train_event_count > 0
    assert first.test_event_count > 0
    assert first.generalization_gap_ratio >= 0.0
    assert isinstance(first.promotion_ready, bool)