feat: Add backtesting parameter sweep support and related functionality
This commit is contained in:
@@ -18,6 +18,9 @@
|
||||
- Added synthetic latency profiler scenarios and CLI scripts for baseline generation and regression checks.
|
||||
- Added latency baseline/threshold artifacts and CI latency guardrail enforcement.
|
||||
- Added deterministic replay backtesting engine, CLI script, and unit coverage for JSONL event replay.
|
||||
- Added backtesting parameter sweep support (`scripts/backtest_sweep.py`) for theta, trade-capital, pair-universe, and staleness-threshold grid search.
|
||||
- Added persisted sweep artifacts with ranked in-sample/out-of-sample results and promotion-ready candidate reporting.
|
||||
- Added out-of-sample overfit guards via train/test time-window split and generalization-gap checks.
|
||||
- Added dashboard controls for tradable pair universe selection and strategy mode/parameter configuration.
|
||||
|
||||
### Changed
|
||||
|
||||
@@ -104,7 +104,7 @@ Minimum `.env` values:
|
||||
```env
|
||||
APP_ENV=dev
|
||||
APP_HOST=0.0.0.0
|
||||
APP_PORT=8000
|
||||
APP_PORT=9090
|
||||
LOG_LEVEL=INFO
|
||||
LOG_JSON=true
|
||||
DUCKDB_PATH=./data/arbitrade.duckdb
|
||||
@@ -132,8 +132,8 @@ python -m arbitrade.main
|
||||
|
||||
Health endpoints:
|
||||
|
||||
- HTML: `http://localhost:8000/`
|
||||
- JSON: `http://localhost:8000/health`
|
||||
- HTML: `http://localhost:9090/`
|
||||
- JSON: `http://localhost:9090/health`
|
||||
|
||||
## Database
|
||||
|
||||
@@ -283,12 +283,12 @@ Set these in Coolify application settings:
|
||||
- Build Command: leave empty.
|
||||
- Install Command: leave empty.
|
||||
- Start Command: leave empty unless you explicitly want to override the image default.
|
||||
- Port: `8000`
|
||||
- Port: `9090` (coolify uses `8000` internally)
|
||||
|
||||
### 3) Configure health check and networking
|
||||
|
||||
- Health Check Path: `/health`
|
||||
- Exposed Port: `8000`
|
||||
- Exposed Port: `9090`
|
||||
- Use Coolify-generated domain or attach your own domain.
|
||||
|
||||
### 4) Configure persistent storage
|
||||
@@ -305,7 +305,7 @@ Add runtime environment variables in Coolify (UI: Environment Variables):
|
||||
|
||||
- `APP_ENV=prod`
|
||||
- `APP_HOST=0.0.0.0`
|
||||
- `APP_PORT=8000`
|
||||
- `APP_PORT=9090`
|
||||
- `DUCKDB_PATH=/app/data/arbitrade.duckdb`
|
||||
- `LOG_LEVEL=INFO`
|
||||
- `LOG_JSON=true`
|
||||
@@ -431,6 +431,12 @@ Run a deterministic replay backtest from a JSONL event stream:
|
||||
python scripts/backtest_replay.py --events path\to\replay.jsonl --starting-balances USD=1000.0
|
||||
```
|
||||
|
||||
Run parameter sweep with train/test split and promotion scoring:
|
||||
|
||||
```powershell
|
||||
python scripts/backtest_sweep.py --events path\to\replay.jsonl --starting-balances USD=1000.0 --output ops/backtesting/parameter_sweep_results.json
|
||||
```
|
||||
|
||||
Replay event format:
|
||||
|
||||
```json
|
||||
@@ -447,7 +453,9 @@ Notes:
|
||||
- Events are replayed in timestamp order.
|
||||
- The replay engine reuses the production detector, pre-trade validation, trade limits, and execution sequencer.
|
||||
- The simulated execution path applies configurable slippage and execution latency so reports include deterministic trade/miss statistics.
|
||||
Latency baseline and threshold artifacts:
|
||||
- Parameter sweep splits replay data into in-sample and out-of-sample windows, ranks configurations by out-of-sample score, and flags overfit via train/test generalization-gap checks.
|
||||
- Sweep output persists ranked combinations and promotion-ready candidates for paper-trading canary promotion decisions.
|
||||
- Latency baseline and threshold artifacts:
|
||||
|
||||
- `ops/performance/latency_baseline.json`
|
||||
- `ops/performance/latency_thresholds.json`
|
||||
|
||||
@@ -0,0 +1,151 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from collections.abc import Mapping, Sequence
|
||||
from pathlib import Path
|
||||
|
||||
from arbitrade.backtesting import load_replay_events
|
||||
from arbitrade.backtesting.sweep import (
|
||||
PromotionCriteria,
|
||||
SweepResult,
|
||||
build_parameter_grid,
|
||||
persist_sweep_results,
|
||||
run_parameter_search,
|
||||
)
|
||||
from arbitrade.detection.graph import CurrencyGraph, TriangularCycle
|
||||
|
||||
|
||||
def _parse_balances(raw: str) -> Mapping[str, float]:
|
||||
balances: dict[str, float] = {}
|
||||
for entry in raw.split(","):
|
||||
stripped = entry.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
asset, value = stripped.split("=", 1)
|
||||
balances[asset.strip().upper()] = float(value)
|
||||
return balances
|
||||
|
||||
|
||||
def _parse_float_list(raw: str) -> list[float]:
|
||||
values = [item.strip() for item in raw.split(",") if item.strip()]
|
||||
if not values:
|
||||
raise ValueError("expected at least one numeric value")
|
||||
return [float(value) for value in values]
|
||||
|
||||
|
||||
def _parse_pair_universes(raw: str) -> list[tuple[str, ...]]:
|
||||
universes: list[tuple[str, ...]] = []
|
||||
for chunk in raw.split(";"):
|
||||
symbols = tuple(item.strip().upper()
|
||||
for item in chunk.split("|") if item.strip())
|
||||
if symbols:
|
||||
universes.append(symbols)
|
||||
if not universes:
|
||||
raise ValueError("at least one pair universe must be provided")
|
||||
return universes
|
||||
|
||||
|
||||
def _build_graph_from_symbols(symbols: Sequence[str]) -> dict[str, list[TriangularCycle]]:
|
||||
graph = CurrencyGraph()
|
||||
for symbol in symbols:
|
||||
normalized = symbol.upper()
|
||||
if "/" not in normalized:
|
||||
continue
|
||||
base, quote = normalized.split("/", 1)
|
||||
graph.add_pair(base, quote, normalized)
|
||||
|
||||
cycles = graph.triangular_cycles()
|
||||
return graph.index_cycles_by_pair(cycles)
|
||||
|
||||
|
||||
def _print_top_results(results: Sequence[SweepResult], *, limit: int = 5) -> None:
|
||||
print(f"Top {min(limit, len(results))} result(s) by out-of-sample score:")
|
||||
for index, result in enumerate(results[:limit], start=1):
|
||||
print(
|
||||
"- "
|
||||
f"#{index} "
|
||||
f"theta={result.parameters.min_profit_threshold:.6f}, "
|
||||
f"capital={result.parameters.trade_capital:.2f}, "
|
||||
f"pairs={','.join(result.parameters.pair_universe)}, "
|
||||
f"staleness={result.parameters.staleness_threshold_seconds:.2f}s, "
|
||||
f"test_score={result.test_score:.4f}, "
|
||||
f"promotion_ready={result.promotion_ready}"
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run backtesting parameter sweep with train/test split.")
|
||||
parser.add_argument("--events", type=Path, required=True)
|
||||
parser.add_argument("--starting-balances", type=str, default="USD=1000.0")
|
||||
parser.add_argument("--theta-values", type=str,
|
||||
default="0.0003,0.0005,0.0008")
|
||||
parser.add_argument("--trade-capital-values",
|
||||
type=str, default="50,100,150")
|
||||
parser.add_argument(
|
||||
"--pair-universes",
|
||||
type=str,
|
||||
default="BTC/USD|ETH/BTC|ETH/USD",
|
||||
help="Semicolon-separated universes, each with | delimited pairs",
|
||||
)
|
||||
parser.add_argument("--staleness-threshold-values",
|
||||
type=str, default="3,5,8")
|
||||
parser.add_argument("--train-ratio", type=float, default=0.7)
|
||||
parser.add_argument("--output", type=Path,
|
||||
default=Path("ops/backtesting/parameter_sweep_results.json"))
|
||||
|
||||
parser.add_argument("--min-test-realized-pnl-usd", type=float, default=0.0)
|
||||
parser.add_argument("--min-test-win-rate", type=float, default=0.5)
|
||||
parser.add_argument("--min-test-fill-rate", type=float, default=0.9)
|
||||
parser.add_argument("--max-test-drawdown-usd", type=float, default=25.0)
|
||||
parser.add_argument("--max-generalization-gap-ratio",
|
||||
type=float, default=0.5)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
events = load_replay_events(args.events)
|
||||
symbols = sorted({event.symbol.upper() for event in events})
|
||||
cycles_by_pair = _build_graph_from_symbols(symbols)
|
||||
if not cycles_by_pair:
|
||||
raise SystemExit(
|
||||
"No triangular cycles found in supplied replay events")
|
||||
|
||||
grid = build_parameter_grid(
|
||||
theta_values=_parse_float_list(args.theta_values),
|
||||
trade_capital_values=_parse_float_list(args.trade_capital_values),
|
||||
pair_universes=_parse_pair_universes(args.pair_universes),
|
||||
staleness_threshold_values=_parse_float_list(
|
||||
args.staleness_threshold_values),
|
||||
)
|
||||
|
||||
artifacts = run_parameter_search(
|
||||
events=events,
|
||||
cycles_by_pair=cycles_by_pair,
|
||||
parameter_grid=grid,
|
||||
starting_balances=_parse_balances(args.starting_balances),
|
||||
train_ratio=args.train_ratio,
|
||||
promotion_criteria=PromotionCriteria(
|
||||
min_test_realized_pnl_usd=args.min_test_realized_pnl_usd,
|
||||
min_test_win_rate=args.min_test_win_rate,
|
||||
min_test_fill_rate=args.min_test_fill_rate,
|
||||
max_test_drawdown_usd=args.max_test_drawdown_usd,
|
||||
max_generalization_gap_ratio=args.max_generalization_gap_ratio,
|
||||
),
|
||||
)
|
||||
|
||||
persist_sweep_results(args.output, artifacts)
|
||||
|
||||
print(f"Completed sweep combinations: {len(artifacts.results)}")
|
||||
print(f"Promotion-ready combinations: {len(artifacts.promoted)}")
|
||||
print(f"Results written: {args.output}")
|
||||
|
||||
_print_top_results(artifacts.results)
|
||||
if artifacts.promoted:
|
||||
print("Promotion candidates (paper-trading canary):")
|
||||
_print_top_results(artifacts.promoted)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -6,6 +6,16 @@ from arbitrade.backtesting.replay import (
|
||||
ReplayClock,
|
||||
load_replay_events,
|
||||
)
|
||||
from arbitrade.backtesting.sweep import (
|
||||
PromotionCriteria,
|
||||
SweepArtifacts,
|
||||
SweepParameters,
|
||||
SweepResult,
|
||||
build_parameter_grid,
|
||||
persist_sweep_results,
|
||||
run_parameter_search,
|
||||
split_events_time_windows,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"ReplayClock",
|
||||
@@ -14,4 +24,12 @@ __all__ = [
|
||||
"BacktestReport",
|
||||
"BacktestReplayEngine",
|
||||
"load_replay_events",
|
||||
"SweepParameters",
|
||||
"SweepResult",
|
||||
"SweepArtifacts",
|
||||
"PromotionCriteria",
|
||||
"split_events_time_windows",
|
||||
"build_parameter_grid",
|
||||
"run_parameter_search",
|
||||
"persist_sweep_results",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,396 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from collections.abc import Mapping, Sequence
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
import orjson
|
||||
|
||||
from arbitrade.backtesting.replay import (
|
||||
BacktestConfig,
|
||||
BacktestReplayEngine,
|
||||
BacktestReport,
|
||||
ReplayBookEvent,
|
||||
)
|
||||
from arbitrade.detection.graph import TriangularCycle
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class SweepParameters:
|
||||
min_profit_threshold: float
|
||||
trade_capital: float
|
||||
pair_universe: tuple[str, ...]
|
||||
staleness_threshold_seconds: float
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class PromotionCriteria:
|
||||
min_test_realized_pnl_usd: float = 0.0
|
||||
min_test_win_rate: float = 0.5
|
||||
min_test_fill_rate: float = 0.9
|
||||
max_test_drawdown_usd: float = 25.0
|
||||
max_generalization_gap_ratio: float = 0.5
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class SweepResult:
|
||||
parameters: SweepParameters
|
||||
train_report: BacktestReport
|
||||
test_report: BacktestReport
|
||||
train_score: float
|
||||
test_score: float
|
||||
generalization_gap_ratio: float
|
||||
overfit_detected: bool
|
||||
promotion_ready: bool
|
||||
promotion_reasons: tuple[str, ...]
|
||||
train_event_count: int
|
||||
test_event_count: int
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class SweepArtifacts:
|
||||
results: tuple[SweepResult, ...]
|
||||
promoted: tuple[SweepResult, ...]
|
||||
train_window: tuple[datetime, datetime] | None
|
||||
test_window: tuple[datetime, datetime] | None
|
||||
|
||||
|
||||
def split_events_time_windows(
|
||||
events: Sequence[ReplayBookEvent],
|
||||
*,
|
||||
train_ratio: float,
|
||||
) -> tuple[list[ReplayBookEvent], list[ReplayBookEvent]]:
|
||||
if train_ratio <= 0.0 or train_ratio >= 1.0:
|
||||
raise ValueError("train_ratio must be between 0 and 1")
|
||||
if len(events) < 2:
|
||||
raise ValueError("at least two events are required for time split")
|
||||
|
||||
split_index = max(1, min(len(events) - 1, int(len(events) * train_ratio)))
|
||||
return list(events[:split_index]), list(events[split_index:])
|
||||
|
||||
|
||||
def build_parameter_grid(
|
||||
*,
|
||||
theta_values: Sequence[float],
|
||||
trade_capital_values: Sequence[float],
|
||||
pair_universes: Sequence[Sequence[str]],
|
||||
staleness_threshold_values: Sequence[float],
|
||||
) -> list[SweepParameters]:
|
||||
if not theta_values:
|
||||
raise ValueError("theta_values must not be empty")
|
||||
if not trade_capital_values:
|
||||
raise ValueError("trade_capital_values must not be empty")
|
||||
if not pair_universes:
|
||||
raise ValueError("pair_universes must not be empty")
|
||||
if not staleness_threshold_values:
|
||||
raise ValueError("staleness_threshold_values must not be empty")
|
||||
|
||||
grid: list[SweepParameters] = []
|
||||
for theta in theta_values:
|
||||
for trade_capital in trade_capital_values:
|
||||
for pair_universe in pair_universes:
|
||||
normalized_universe = tuple(
|
||||
sorted({pair.upper() for pair in pair_universe}))
|
||||
for staleness_threshold in staleness_threshold_values:
|
||||
grid.append(
|
||||
SweepParameters(
|
||||
min_profit_threshold=float(theta),
|
||||
trade_capital=float(trade_capital),
|
||||
pair_universe=normalized_universe,
|
||||
staleness_threshold_seconds=float(
|
||||
staleness_threshold),
|
||||
)
|
||||
)
|
||||
return grid
|
||||
|
||||
|
||||
def _filter_events_for_parameters(
|
||||
events: Sequence[ReplayBookEvent],
|
||||
*,
|
||||
pair_universe: set[str],
|
||||
staleness_threshold_seconds: float,
|
||||
) -> list[ReplayBookEvent]:
|
||||
if staleness_threshold_seconds <= 0.0:
|
||||
raise ValueError("staleness_threshold_seconds must be > 0")
|
||||
|
||||
filtered: list[ReplayBookEvent] = []
|
||||
last_seen_by_symbol: dict[str, datetime] = {}
|
||||
|
||||
for event in events:
|
||||
symbol = event.symbol.upper()
|
||||
if symbol not in pair_universe:
|
||||
continue
|
||||
|
||||
previous = last_seen_by_symbol.get(symbol)
|
||||
last_seen_by_symbol[symbol] = event.occurred_at
|
||||
if previous is None:
|
||||
filtered.append(event)
|
||||
continue
|
||||
|
||||
gap_seconds = (event.occurred_at - previous).total_seconds()
|
||||
if gap_seconds <= staleness_threshold_seconds:
|
||||
filtered.append(event)
|
||||
|
||||
return filtered
|
||||
|
||||
|
||||
def _restrict_cycles_by_pair(
|
||||
cycles_by_pair: Mapping[str, list[TriangularCycle]],
|
||||
*,
|
||||
pair_universe: set[str],
|
||||
) -> dict[str, list[TriangularCycle]]:
|
||||
restricted: dict[str, list[TriangularCycle]] = {}
|
||||
for pair_symbol, cycles in cycles_by_pair.items():
|
||||
normalized_pair = pair_symbol.upper()
|
||||
if normalized_pair not in pair_universe:
|
||||
continue
|
||||
|
||||
kept = [cycle for cycle in cycles if all(
|
||||
pair.upper() in pair_universe for pair in cycle.pairs)]
|
||||
if kept:
|
||||
restricted[normalized_pair] = kept
|
||||
return restricted
|
||||
|
||||
|
||||
def _score_report(report: BacktestReport) -> float:
|
||||
win_rate_bonus = (report.win_rate or 0.0) * 100.0
|
||||
fill_rate_bonus = (report.fill_rate or 0.0) * 50.0
|
||||
return report.realized_pnl_usd + win_rate_bonus + fill_rate_bonus - report.max_drawdown_usd
|
||||
|
||||
|
||||
def _safe_ratio(numerator: float, denominator: float) -> float:
|
||||
if denominator <= 0.0:
|
||||
return 0.0 if numerator <= 0.0 else 1.0
|
||||
return max(0.0, numerator / denominator)
|
||||
|
||||
|
||||
def _evaluate_promotion(
|
||||
*,
|
||||
result: SweepResult,
|
||||
criteria: PromotionCriteria,
|
||||
) -> tuple[bool, tuple[str, ...]]:
|
||||
reasons: list[str] = []
|
||||
test = result.test_report
|
||||
|
||||
if test.realized_pnl_usd < criteria.min_test_realized_pnl_usd:
|
||||
reasons.append(
|
||||
"test_realized_pnl_below_threshold"
|
||||
)
|
||||
if (test.win_rate or 0.0) < criteria.min_test_win_rate:
|
||||
reasons.append("test_win_rate_below_threshold")
|
||||
if (test.fill_rate or 0.0) < criteria.min_test_fill_rate:
|
||||
reasons.append("test_fill_rate_below_threshold")
|
||||
if test.max_drawdown_usd > criteria.max_test_drawdown_usd:
|
||||
reasons.append("test_drawdown_above_threshold")
|
||||
if result.generalization_gap_ratio > criteria.max_generalization_gap_ratio:
|
||||
reasons.append("generalization_gap_above_threshold")
|
||||
|
||||
return (not reasons), tuple(reasons)
|
||||
|
||||
|
||||
def _run_backtest(
|
||||
*,
|
||||
events: Sequence[ReplayBookEvent],
|
||||
cycles_by_pair: Mapping[str, list[TriangularCycle]],
|
||||
available_pairs: Sequence[str],
|
||||
config: BacktestConfig,
|
||||
starting_balances: Mapping[str, float],
|
||||
) -> BacktestReport:
|
||||
started_at = events[0].occurred_at if events else datetime.now(UTC)
|
||||
engine = BacktestReplayEngine(
|
||||
cycles_by_pair=cycles_by_pair,
|
||||
available_pairs=available_pairs,
|
||||
config=config,
|
||||
started_at=started_at,
|
||||
)
|
||||
return asyncio.run(engine.run(events, starting_balances=starting_balances))
|
||||
|
||||
|
||||
def run_parameter_search(
|
||||
*,
|
||||
events: Sequence[ReplayBookEvent],
|
||||
cycles_by_pair: Mapping[str, list[TriangularCycle]],
|
||||
parameter_grid: Sequence[SweepParameters],
|
||||
starting_balances: Mapping[str, float],
|
||||
train_ratio: float,
|
||||
promotion_criteria: PromotionCriteria | None = None,
|
||||
max_concurrent_trades: int = 1,
|
||||
max_depth_levels: int = 10,
|
||||
quote_asset: str = "USD",
|
||||
) -> SweepArtifacts:
|
||||
criteria = promotion_criteria or PromotionCriteria()
|
||||
train_events, test_events = split_events_time_windows(
|
||||
events, train_ratio=train_ratio)
|
||||
|
||||
results: list[SweepResult] = []
|
||||
promoted: list[SweepResult] = []
|
||||
|
||||
for parameters in parameter_grid:
|
||||
allowed_pairs = set(parameters.pair_universe)
|
||||
filtered_train = _filter_events_for_parameters(
|
||||
train_events,
|
||||
pair_universe=allowed_pairs,
|
||||
staleness_threshold_seconds=parameters.staleness_threshold_seconds,
|
||||
)
|
||||
filtered_test = _filter_events_for_parameters(
|
||||
test_events,
|
||||
pair_universe=allowed_pairs,
|
||||
staleness_threshold_seconds=parameters.staleness_threshold_seconds,
|
||||
)
|
||||
|
||||
if not filtered_train or not filtered_test:
|
||||
continue
|
||||
|
||||
restricted_cycles = _restrict_cycles_by_pair(
|
||||
cycles_by_pair,
|
||||
pair_universe=allowed_pairs,
|
||||
)
|
||||
if not restricted_cycles:
|
||||
continue
|
||||
|
||||
config = BacktestConfig(
|
||||
min_profit_threshold=parameters.min_profit_threshold,
|
||||
trade_capital=parameters.trade_capital,
|
||||
max_concurrent_trades=max_concurrent_trades,
|
||||
max_depth_levels=max_depth_levels,
|
||||
quote_asset=quote_asset,
|
||||
)
|
||||
|
||||
train_report = _run_backtest(
|
||||
events=filtered_train,
|
||||
cycles_by_pair=restricted_cycles,
|
||||
available_pairs=sorted(allowed_pairs),
|
||||
config=config,
|
||||
starting_balances=starting_balances,
|
||||
)
|
||||
test_report = _run_backtest(
|
||||
events=filtered_test,
|
||||
cycles_by_pair=restricted_cycles,
|
||||
available_pairs=sorted(allowed_pairs),
|
||||
config=config,
|
||||
starting_balances=starting_balances,
|
||||
)
|
||||
|
||||
train_score = _score_report(train_report)
|
||||
test_score = _score_report(test_report)
|
||||
score_drop = max(0.0, train_score - test_score)
|
||||
generalization_gap_ratio = _safe_ratio(score_drop, abs(train_score))
|
||||
overfit_detected = generalization_gap_ratio > criteria.max_generalization_gap_ratio
|
||||
|
||||
base_result = SweepResult(
|
||||
parameters=parameters,
|
||||
train_report=train_report,
|
||||
test_report=test_report,
|
||||
train_score=train_score,
|
||||
test_score=test_score,
|
||||
generalization_gap_ratio=generalization_gap_ratio,
|
||||
overfit_detected=overfit_detected,
|
||||
promotion_ready=False,
|
||||
promotion_reasons=(),
|
||||
train_event_count=len(filtered_train),
|
||||
test_event_count=len(filtered_test),
|
||||
)
|
||||
promotion_ready, promotion_reasons = _evaluate_promotion(
|
||||
result=base_result, criteria=criteria)
|
||||
completed_result = SweepResult(
|
||||
parameters=base_result.parameters,
|
||||
train_report=base_result.train_report,
|
||||
test_report=base_result.test_report,
|
||||
train_score=base_result.train_score,
|
||||
test_score=base_result.test_score,
|
||||
generalization_gap_ratio=base_result.generalization_gap_ratio,
|
||||
overfit_detected=base_result.overfit_detected,
|
||||
promotion_ready=promotion_ready,
|
||||
promotion_reasons=promotion_reasons,
|
||||
train_event_count=base_result.train_event_count,
|
||||
test_event_count=base_result.test_event_count,
|
||||
)
|
||||
|
||||
results.append(completed_result)
|
||||
if completed_result.promotion_ready:
|
||||
promoted.append(completed_result)
|
||||
|
||||
results.sort(key=lambda item: item.test_score, reverse=True)
|
||||
promoted.sort(key=lambda item: item.test_score, reverse=True)
|
||||
|
||||
train_window: tuple[datetime, datetime] | None = None
|
||||
test_window: tuple[datetime, datetime] | None = None
|
||||
if train_events:
|
||||
train_window = (train_events[0].occurred_at,
|
||||
train_events[-1].occurred_at)
|
||||
if test_events:
|
||||
test_window = (test_events[0].occurred_at, test_events[-1].occurred_at)
|
||||
|
||||
return SweepArtifacts(
|
||||
results=tuple(results),
|
||||
promoted=tuple(promoted),
|
||||
train_window=train_window,
|
||||
test_window=test_window,
|
||||
)
|
||||
|
||||
|
||||
def _report_to_dict(report: BacktestReport) -> dict[str, object]:
|
||||
return {
|
||||
"started_at": report.started_at.isoformat(),
|
||||
"finished_at": report.finished_at.isoformat(),
|
||||
"processed_events": report.processed_events,
|
||||
"opportunities_seen": report.opportunities_seen,
|
||||
"trades_executed": report.trades_executed,
|
||||
"win_rate": report.win_rate,
|
||||
"fill_rate": report.fill_rate,
|
||||
"realized_pnl_usd": report.realized_pnl_usd,
|
||||
"max_drawdown_usd": report.max_drawdown_usd,
|
||||
"miss_reasons": dict(report.miss_reasons),
|
||||
"execution_latency_p50_ms": report.execution_latency_p50_ms,
|
||||
"execution_latency_p95_ms": report.execution_latency_p95_ms,
|
||||
"execution_latency_p99_ms": report.execution_latency_p99_ms,
|
||||
}
|
||||
|
||||
|
||||
def persist_sweep_results(path: Path, artifacts: SweepArtifacts) -> None:
|
||||
payload = {
|
||||
"generated_at": datetime.now(UTC).isoformat(),
|
||||
"train_window": (
|
||||
{
|
||||
"started_at": artifacts.train_window[0].isoformat(),
|
||||
"finished_at": artifacts.train_window[1].isoformat(),
|
||||
}
|
||||
if artifacts.train_window is not None
|
||||
else None
|
||||
),
|
||||
"test_window": (
|
||||
{
|
||||
"started_at": artifacts.test_window[0].isoformat(),
|
||||
"finished_at": artifacts.test_window[1].isoformat(),
|
||||
}
|
||||
if artifacts.test_window is not None
|
||||
else None
|
||||
),
|
||||
"results": [
|
||||
{
|
||||
"parameters": {
|
||||
"min_profit_threshold": result.parameters.min_profit_threshold,
|
||||
"trade_capital": result.parameters.trade_capital,
|
||||
"pair_universe": list(result.parameters.pair_universe),
|
||||
"staleness_threshold_seconds": result.parameters.staleness_threshold_seconds,
|
||||
},
|
||||
"train_report": _report_to_dict(result.train_report),
|
||||
"test_report": _report_to_dict(result.test_report),
|
||||
"train_score": result.train_score,
|
||||
"test_score": result.test_score,
|
||||
"generalization_gap_ratio": result.generalization_gap_ratio,
|
||||
"overfit_detected": result.overfit_detected,
|
||||
"promotion_ready": result.promotion_ready,
|
||||
"promotion_reasons": list(result.promotion_reasons),
|
||||
"train_event_count": result.train_event_count,
|
||||
"test_event_count": result.test_event_count,
|
||||
}
|
||||
for result in artifacts.results
|
||||
],
|
||||
}
|
||||
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_bytes(orjson.dumps(
|
||||
payload, option=orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS))
|
||||
@@ -0,0 +1,102 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
from arbitrade.backtesting.replay import ReplayBookEvent
|
||||
from arbitrade.backtesting.sweep import (
|
||||
PromotionCriteria,
|
||||
SweepResult,
|
||||
build_parameter_grid,
|
||||
run_parameter_search,
|
||||
split_events_time_windows,
|
||||
)
|
||||
from arbitrade.detection.graph import CurrencyGraph
|
||||
from arbitrade.exchange.models import BookLevel
|
||||
|
||||
|
||||
def _build_cycles() -> dict[str, list]:
|
||||
graph = CurrencyGraph()
|
||||
graph.add_pair("USD", "BTC", "BTC/USD")
|
||||
graph.add_pair("BTC", "ETH", "ETH/BTC")
|
||||
graph.add_pair("ETH", "USD", "ETH/USD")
|
||||
return graph.index_cycles_by_pair(graph.triangular_cycles())
|
||||
|
||||
|
||||
def _events() -> list[ReplayBookEvent]:
|
||||
base_time = datetime(2026, 6, 1, 12, 0, tzinfo=UTC)
|
||||
rows: list[ReplayBookEvent] = []
|
||||
for index in range(12):
|
||||
tick = base_time + timedelta(seconds=index)
|
||||
rows.extend(
|
||||
[
|
||||
ReplayBookEvent(
|
||||
occurred_at=tick,
|
||||
symbol="BTC/USD",
|
||||
bids=(BookLevel(price=99.5, volume=10.0),),
|
||||
asks=(BookLevel(price=100.0, volume=10.0),),
|
||||
),
|
||||
ReplayBookEvent(
|
||||
occurred_at=tick,
|
||||
symbol="ETH/BTC",
|
||||
bids=(BookLevel(price=0.051, volume=10.0),),
|
||||
asks=(BookLevel(price=0.050, volume=10.0),),
|
||||
),
|
||||
ReplayBookEvent(
|
||||
occurred_at=tick,
|
||||
symbol="ETH/USD",
|
||||
bids=(BookLevel(price=110.0, volume=10.0),),
|
||||
asks=(BookLevel(price=110.5, volume=10.0),),
|
||||
),
|
||||
]
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def test_split_events_time_windows_returns_non_empty_train_and_test() -> None:
|
||||
train, test = split_events_time_windows(_events(), train_ratio=0.7)
|
||||
|
||||
assert train
|
||||
assert test
|
||||
assert train[-1].occurred_at <= test[0].occurred_at
|
||||
|
||||
|
||||
def test_build_parameter_grid_expands_combinations() -> None:
|
||||
grid = build_parameter_grid(
|
||||
theta_values=[0.0005, 0.001],
|
||||
trade_capital_values=[100.0],
|
||||
pair_universes=[["BTC/USD", "ETH/BTC", "ETH/USD"]],
|
||||
staleness_threshold_values=[3.0, 5.0],
|
||||
)
|
||||
|
||||
assert len(grid) == 4
|
||||
|
||||
|
||||
def test_run_parameter_search_produces_ranked_results_with_overfit_guard() -> None:
|
||||
artifacts = run_parameter_search(
|
||||
events=_events(),
|
||||
cycles_by_pair=_build_cycles(),
|
||||
parameter_grid=build_parameter_grid(
|
||||
theta_values=[0.0005, 0.001],
|
||||
trade_capital_values=[75.0, 100.0],
|
||||
pair_universes=[["BTC/USD", "ETH/BTC", "ETH/USD"]],
|
||||
staleness_threshold_values=[5.0],
|
||||
),
|
||||
starting_balances={"USD": 2000.0},
|
||||
train_ratio=0.7,
|
||||
promotion_criteria=PromotionCriteria(
|
||||
min_test_realized_pnl_usd=-1000.0,
|
||||
min_test_win_rate=0.0,
|
||||
min_test_fill_rate=0.0,
|
||||
max_test_drawdown_usd=1_000_000.0,
|
||||
max_generalization_gap_ratio=0.9,
|
||||
),
|
||||
)
|
||||
|
||||
assert artifacts.results
|
||||
assert artifacts.results[0].test_score >= artifacts.results[-1].test_score
|
||||
|
||||
first: SweepResult = artifacts.results[0]
|
||||
assert first.train_event_count > 0
|
||||
assert first.test_event_count > 0
|
||||
assert first.generalization_gap_ratio >= 0.0
|
||||
assert isinstance(first.promotion_ready, bool)
|
||||
Reference in New Issue
Block a user