feat: Add backtesting parameter sweep support and related functionality
This commit is contained in:
@@ -18,6 +18,9 @@
|
|||||||
- Added synthetic latency profiler scenarios and CLI scripts for baseline generation and regression checks.
|
- Added synthetic latency profiler scenarios and CLI scripts for baseline generation and regression checks.
|
||||||
- Added latency baseline/threshold artifacts and CI latency guardrail enforcement.
|
- Added latency baseline/threshold artifacts and CI latency guardrail enforcement.
|
||||||
- Added deterministic replay backtesting engine, CLI script, and unit coverage for JSONL event replay.
|
- Added deterministic replay backtesting engine, CLI script, and unit coverage for JSONL event replay.
|
||||||
|
- Added backtesting parameter sweep support (`scripts/backtest_sweep.py`) for theta, trade-capital, pair-universe, and staleness-threshold grid search.
|
||||||
|
- Added persisted sweep artifacts with ranked in-sample/out-of-sample results and promotion-ready candidate reporting.
|
||||||
|
- Added out-of-sample overfit guards via train/test time-window split and generalization-gap checks.
|
||||||
- Added dashboard controls for tradable pair universe selection and strategy mode/parameter configuration.
|
- Added dashboard controls for tradable pair universe selection and strategy mode/parameter configuration.
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
|||||||
@@ -104,7 +104,7 @@ Minimum `.env` values:
|
|||||||
```env
|
```env
|
||||||
APP_ENV=dev
|
APP_ENV=dev
|
||||||
APP_HOST=0.0.0.0
|
APP_HOST=0.0.0.0
|
||||||
APP_PORT=8000
|
APP_PORT=9090
|
||||||
LOG_LEVEL=INFO
|
LOG_LEVEL=INFO
|
||||||
LOG_JSON=true
|
LOG_JSON=true
|
||||||
DUCKDB_PATH=./data/arbitrade.duckdb
|
DUCKDB_PATH=./data/arbitrade.duckdb
|
||||||
@@ -132,8 +132,8 @@ python -m arbitrade.main
|
|||||||
|
|
||||||
Health endpoints:
|
Health endpoints:
|
||||||
|
|
||||||
- HTML: `http://localhost:8000/`
|
- HTML: `http://localhost:9090/`
|
||||||
- JSON: `http://localhost:8000/health`
|
- JSON: `http://localhost:9090/health`
|
||||||
|
|
||||||
## Database
|
## Database
|
||||||
|
|
||||||
@@ -283,12 +283,12 @@ Set these in Coolify application settings:
|
|||||||
- Build Command: leave empty.
|
- Build Command: leave empty.
|
||||||
- Install Command: leave empty.
|
- Install Command: leave empty.
|
||||||
- Start Command: leave empty unless you explicitly want to override the image default.
|
- Start Command: leave empty unless you explicitly want to override the image default.
|
||||||
- Port: `8000`
|
- Port: `9090` (coolify uses `8000` internally)
|
||||||
|
|
||||||
### 3) Configure health check and networking
|
### 3) Configure health check and networking
|
||||||
|
|
||||||
- Health Check Path: `/health`
|
- Health Check Path: `/health`
|
||||||
- Exposed Port: `8000`
|
- Exposed Port: `9090`
|
||||||
- Use Coolify-generated domain or attach your own domain.
|
- Use Coolify-generated domain or attach your own domain.
|
||||||
|
|
||||||
### 4) Configure persistent storage
|
### 4) Configure persistent storage
|
||||||
@@ -305,7 +305,7 @@ Add runtime environment variables in Coolify (UI: Environment Variables):
|
|||||||
|
|
||||||
- `APP_ENV=prod`
|
- `APP_ENV=prod`
|
||||||
- `APP_HOST=0.0.0.0`
|
- `APP_HOST=0.0.0.0`
|
||||||
- `APP_PORT=8000`
|
- `APP_PORT=9090`
|
||||||
- `DUCKDB_PATH=/app/data/arbitrade.duckdb`
|
- `DUCKDB_PATH=/app/data/arbitrade.duckdb`
|
||||||
- `LOG_LEVEL=INFO`
|
- `LOG_LEVEL=INFO`
|
||||||
- `LOG_JSON=true`
|
- `LOG_JSON=true`
|
||||||
@@ -431,6 +431,12 @@ Run a deterministic replay backtest from a JSONL event stream:
|
|||||||
python scripts/backtest_replay.py --events path\to\replay.jsonl --starting-balances USD=1000.0
|
python scripts/backtest_replay.py --events path\to\replay.jsonl --starting-balances USD=1000.0
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Run parameter sweep with train/test split and promotion scoring:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
python scripts/backtest_sweep.py --events path\to\replay.jsonl --starting-balances USD=1000.0 --output ops/backtesting/parameter_sweep_results.json
|
||||||
|
```
|
||||||
|
|
||||||
Replay event format:
|
Replay event format:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
@@ -447,7 +453,9 @@ Notes:
|
|||||||
- Events are replayed in timestamp order.
|
- Events are replayed in timestamp order.
|
||||||
- The replay engine reuses the production detector, pre-trade validation, trade limits, and execution sequencer.
|
- The replay engine reuses the production detector, pre-trade validation, trade limits, and execution sequencer.
|
||||||
- The simulated execution path applies configurable slippage and execution latency so reports include deterministic trade/miss statistics.
|
- The simulated execution path applies configurable slippage and execution latency so reports include deterministic trade/miss statistics.
|
||||||
Latency baseline and threshold artifacts:
|
- Parameter sweep splits replay data into in-sample and out-of-sample windows, ranks configurations by out-of-sample score, and flags overfit via train/test generalization-gap checks.
|
||||||
|
- Sweep output persists ranked combinations and promotion-ready candidates for paper-trading canary promotion decisions.
|
||||||
|
- Latency baseline and threshold artifacts:
|
||||||
|
|
||||||
- `ops/performance/latency_baseline.json`
|
- `ops/performance/latency_baseline.json`
|
||||||
- `ops/performance/latency_thresholds.json`
|
- `ops/performance/latency_thresholds.json`
|
||||||
|
|||||||
@@ -0,0 +1,151 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from collections.abc import Mapping, Sequence
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from arbitrade.backtesting import load_replay_events
|
||||||
|
from arbitrade.backtesting.sweep import (
|
||||||
|
PromotionCriteria,
|
||||||
|
SweepResult,
|
||||||
|
build_parameter_grid,
|
||||||
|
persist_sweep_results,
|
||||||
|
run_parameter_search,
|
||||||
|
)
|
||||||
|
from arbitrade.detection.graph import CurrencyGraph, TriangularCycle
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_balances(raw: str) -> Mapping[str, float]:
|
||||||
|
balances: dict[str, float] = {}
|
||||||
|
for entry in raw.split(","):
|
||||||
|
stripped = entry.strip()
|
||||||
|
if not stripped:
|
||||||
|
continue
|
||||||
|
asset, value = stripped.split("=", 1)
|
||||||
|
balances[asset.strip().upper()] = float(value)
|
||||||
|
return balances
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_float_list(raw: str) -> list[float]:
|
||||||
|
values = [item.strip() for item in raw.split(",") if item.strip()]
|
||||||
|
if not values:
|
||||||
|
raise ValueError("expected at least one numeric value")
|
||||||
|
return [float(value) for value in values]
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_pair_universes(raw: str) -> list[tuple[str, ...]]:
|
||||||
|
universes: list[tuple[str, ...]] = []
|
||||||
|
for chunk in raw.split(";"):
|
||||||
|
symbols = tuple(item.strip().upper()
|
||||||
|
for item in chunk.split("|") if item.strip())
|
||||||
|
if symbols:
|
||||||
|
universes.append(symbols)
|
||||||
|
if not universes:
|
||||||
|
raise ValueError("at least one pair universe must be provided")
|
||||||
|
return universes
|
||||||
|
|
||||||
|
|
||||||
|
def _build_graph_from_symbols(symbols: Sequence[str]) -> dict[str, list[TriangularCycle]]:
|
||||||
|
graph = CurrencyGraph()
|
||||||
|
for symbol in symbols:
|
||||||
|
normalized = symbol.upper()
|
||||||
|
if "/" not in normalized:
|
||||||
|
continue
|
||||||
|
base, quote = normalized.split("/", 1)
|
||||||
|
graph.add_pair(base, quote, normalized)
|
||||||
|
|
||||||
|
cycles = graph.triangular_cycles()
|
||||||
|
return graph.index_cycles_by_pair(cycles)
|
||||||
|
|
||||||
|
|
||||||
|
def _print_top_results(results: Sequence[SweepResult], *, limit: int = 5) -> None:
|
||||||
|
print(f"Top {min(limit, len(results))} result(s) by out-of-sample score:")
|
||||||
|
for index, result in enumerate(results[:limit], start=1):
|
||||||
|
print(
|
||||||
|
"- "
|
||||||
|
f"#{index} "
|
||||||
|
f"theta={result.parameters.min_profit_threshold:.6f}, "
|
||||||
|
f"capital={result.parameters.trade_capital:.2f}, "
|
||||||
|
f"pairs={','.join(result.parameters.pair_universe)}, "
|
||||||
|
f"staleness={result.parameters.staleness_threshold_seconds:.2f}s, "
|
||||||
|
f"test_score={result.test_score:.4f}, "
|
||||||
|
f"promotion_ready={result.promotion_ready}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Run backtesting parameter sweep with train/test split.")
|
||||||
|
parser.add_argument("--events", type=Path, required=True)
|
||||||
|
parser.add_argument("--starting-balances", type=str, default="USD=1000.0")
|
||||||
|
parser.add_argument("--theta-values", type=str,
|
||||||
|
default="0.0003,0.0005,0.0008")
|
||||||
|
parser.add_argument("--trade-capital-values",
|
||||||
|
type=str, default="50,100,150")
|
||||||
|
parser.add_argument(
|
||||||
|
"--pair-universes",
|
||||||
|
type=str,
|
||||||
|
default="BTC/USD|ETH/BTC|ETH/USD",
|
||||||
|
help="Semicolon-separated universes, each with | delimited pairs",
|
||||||
|
)
|
||||||
|
parser.add_argument("--staleness-threshold-values",
|
||||||
|
type=str, default="3,5,8")
|
||||||
|
parser.add_argument("--train-ratio", type=float, default=0.7)
|
||||||
|
parser.add_argument("--output", type=Path,
|
||||||
|
default=Path("ops/backtesting/parameter_sweep_results.json"))
|
||||||
|
|
||||||
|
parser.add_argument("--min-test-realized-pnl-usd", type=float, default=0.0)
|
||||||
|
parser.add_argument("--min-test-win-rate", type=float, default=0.5)
|
||||||
|
parser.add_argument("--min-test-fill-rate", type=float, default=0.9)
|
||||||
|
parser.add_argument("--max-test-drawdown-usd", type=float, default=25.0)
|
||||||
|
parser.add_argument("--max-generalization-gap-ratio",
|
||||||
|
type=float, default=0.5)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
events = load_replay_events(args.events)
|
||||||
|
symbols = sorted({event.symbol.upper() for event in events})
|
||||||
|
cycles_by_pair = _build_graph_from_symbols(symbols)
|
||||||
|
if not cycles_by_pair:
|
||||||
|
raise SystemExit(
|
||||||
|
"No triangular cycles found in supplied replay events")
|
||||||
|
|
||||||
|
grid = build_parameter_grid(
|
||||||
|
theta_values=_parse_float_list(args.theta_values),
|
||||||
|
trade_capital_values=_parse_float_list(args.trade_capital_values),
|
||||||
|
pair_universes=_parse_pair_universes(args.pair_universes),
|
||||||
|
staleness_threshold_values=_parse_float_list(
|
||||||
|
args.staleness_threshold_values),
|
||||||
|
)
|
||||||
|
|
||||||
|
artifacts = run_parameter_search(
|
||||||
|
events=events,
|
||||||
|
cycles_by_pair=cycles_by_pair,
|
||||||
|
parameter_grid=grid,
|
||||||
|
starting_balances=_parse_balances(args.starting_balances),
|
||||||
|
train_ratio=args.train_ratio,
|
||||||
|
promotion_criteria=PromotionCriteria(
|
||||||
|
min_test_realized_pnl_usd=args.min_test_realized_pnl_usd,
|
||||||
|
min_test_win_rate=args.min_test_win_rate,
|
||||||
|
min_test_fill_rate=args.min_test_fill_rate,
|
||||||
|
max_test_drawdown_usd=args.max_test_drawdown_usd,
|
||||||
|
max_generalization_gap_ratio=args.max_generalization_gap_ratio,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
persist_sweep_results(args.output, artifacts)
|
||||||
|
|
||||||
|
print(f"Completed sweep combinations: {len(artifacts.results)}")
|
||||||
|
print(f"Promotion-ready combinations: {len(artifacts.promoted)}")
|
||||||
|
print(f"Results written: {args.output}")
|
||||||
|
|
||||||
|
_print_top_results(artifacts.results)
|
||||||
|
if artifacts.promoted:
|
||||||
|
print("Promotion candidates (paper-trading canary):")
|
||||||
|
_print_top_results(artifacts.promoted)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
@@ -6,6 +6,16 @@ from arbitrade.backtesting.replay import (
|
|||||||
ReplayClock,
|
ReplayClock,
|
||||||
load_replay_events,
|
load_replay_events,
|
||||||
)
|
)
|
||||||
|
from arbitrade.backtesting.sweep import (
|
||||||
|
PromotionCriteria,
|
||||||
|
SweepArtifacts,
|
||||||
|
SweepParameters,
|
||||||
|
SweepResult,
|
||||||
|
build_parameter_grid,
|
||||||
|
persist_sweep_results,
|
||||||
|
run_parameter_search,
|
||||||
|
split_events_time_windows,
|
||||||
|
)
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"ReplayClock",
|
"ReplayClock",
|
||||||
@@ -14,4 +24,12 @@ __all__ = [
|
|||||||
"BacktestReport",
|
"BacktestReport",
|
||||||
"BacktestReplayEngine",
|
"BacktestReplayEngine",
|
||||||
"load_replay_events",
|
"load_replay_events",
|
||||||
|
"SweepParameters",
|
||||||
|
"SweepResult",
|
||||||
|
"SweepArtifacts",
|
||||||
|
"PromotionCriteria",
|
||||||
|
"split_events_time_windows",
|
||||||
|
"build_parameter_grid",
|
||||||
|
"run_parameter_search",
|
||||||
|
"persist_sweep_results",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -0,0 +1,396 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from collections.abc import Mapping, Sequence
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import orjson
|
||||||
|
|
||||||
|
from arbitrade.backtesting.replay import (
|
||||||
|
BacktestConfig,
|
||||||
|
BacktestReplayEngine,
|
||||||
|
BacktestReport,
|
||||||
|
ReplayBookEvent,
|
||||||
|
)
|
||||||
|
from arbitrade.detection.graph import TriangularCycle
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class SweepParameters:
|
||||||
|
min_profit_threshold: float
|
||||||
|
trade_capital: float
|
||||||
|
pair_universe: tuple[str, ...]
|
||||||
|
staleness_threshold_seconds: float
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class PromotionCriteria:
|
||||||
|
min_test_realized_pnl_usd: float = 0.0
|
||||||
|
min_test_win_rate: float = 0.5
|
||||||
|
min_test_fill_rate: float = 0.9
|
||||||
|
max_test_drawdown_usd: float = 25.0
|
||||||
|
max_generalization_gap_ratio: float = 0.5
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class SweepResult:
|
||||||
|
parameters: SweepParameters
|
||||||
|
train_report: BacktestReport
|
||||||
|
test_report: BacktestReport
|
||||||
|
train_score: float
|
||||||
|
test_score: float
|
||||||
|
generalization_gap_ratio: float
|
||||||
|
overfit_detected: bool
|
||||||
|
promotion_ready: bool
|
||||||
|
promotion_reasons: tuple[str, ...]
|
||||||
|
train_event_count: int
|
||||||
|
test_event_count: int
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class SweepArtifacts:
|
||||||
|
results: tuple[SweepResult, ...]
|
||||||
|
promoted: tuple[SweepResult, ...]
|
||||||
|
train_window: tuple[datetime, datetime] | None
|
||||||
|
test_window: tuple[datetime, datetime] | None
|
||||||
|
|
||||||
|
|
||||||
|
def split_events_time_windows(
|
||||||
|
events: Sequence[ReplayBookEvent],
|
||||||
|
*,
|
||||||
|
train_ratio: float,
|
||||||
|
) -> tuple[list[ReplayBookEvent], list[ReplayBookEvent]]:
|
||||||
|
if train_ratio <= 0.0 or train_ratio >= 1.0:
|
||||||
|
raise ValueError("train_ratio must be between 0 and 1")
|
||||||
|
if len(events) < 2:
|
||||||
|
raise ValueError("at least two events are required for time split")
|
||||||
|
|
||||||
|
split_index = max(1, min(len(events) - 1, int(len(events) * train_ratio)))
|
||||||
|
return list(events[:split_index]), list(events[split_index:])
|
||||||
|
|
||||||
|
|
||||||
|
def build_parameter_grid(
|
||||||
|
*,
|
||||||
|
theta_values: Sequence[float],
|
||||||
|
trade_capital_values: Sequence[float],
|
||||||
|
pair_universes: Sequence[Sequence[str]],
|
||||||
|
staleness_threshold_values: Sequence[float],
|
||||||
|
) -> list[SweepParameters]:
|
||||||
|
if not theta_values:
|
||||||
|
raise ValueError("theta_values must not be empty")
|
||||||
|
if not trade_capital_values:
|
||||||
|
raise ValueError("trade_capital_values must not be empty")
|
||||||
|
if not pair_universes:
|
||||||
|
raise ValueError("pair_universes must not be empty")
|
||||||
|
if not staleness_threshold_values:
|
||||||
|
raise ValueError("staleness_threshold_values must not be empty")
|
||||||
|
|
||||||
|
grid: list[SweepParameters] = []
|
||||||
|
for theta in theta_values:
|
||||||
|
for trade_capital in trade_capital_values:
|
||||||
|
for pair_universe in pair_universes:
|
||||||
|
normalized_universe = tuple(
|
||||||
|
sorted({pair.upper() for pair in pair_universe}))
|
||||||
|
for staleness_threshold in staleness_threshold_values:
|
||||||
|
grid.append(
|
||||||
|
SweepParameters(
|
||||||
|
min_profit_threshold=float(theta),
|
||||||
|
trade_capital=float(trade_capital),
|
||||||
|
pair_universe=normalized_universe,
|
||||||
|
staleness_threshold_seconds=float(
|
||||||
|
staleness_threshold),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return grid
|
||||||
|
|
||||||
|
|
||||||
|
def _filter_events_for_parameters(
|
||||||
|
events: Sequence[ReplayBookEvent],
|
||||||
|
*,
|
||||||
|
pair_universe: set[str],
|
||||||
|
staleness_threshold_seconds: float,
|
||||||
|
) -> list[ReplayBookEvent]:
|
||||||
|
if staleness_threshold_seconds <= 0.0:
|
||||||
|
raise ValueError("staleness_threshold_seconds must be > 0")
|
||||||
|
|
||||||
|
filtered: list[ReplayBookEvent] = []
|
||||||
|
last_seen_by_symbol: dict[str, datetime] = {}
|
||||||
|
|
||||||
|
for event in events:
|
||||||
|
symbol = event.symbol.upper()
|
||||||
|
if symbol not in pair_universe:
|
||||||
|
continue
|
||||||
|
|
||||||
|
previous = last_seen_by_symbol.get(symbol)
|
||||||
|
last_seen_by_symbol[symbol] = event.occurred_at
|
||||||
|
if previous is None:
|
||||||
|
filtered.append(event)
|
||||||
|
continue
|
||||||
|
|
||||||
|
gap_seconds = (event.occurred_at - previous).total_seconds()
|
||||||
|
if gap_seconds <= staleness_threshold_seconds:
|
||||||
|
filtered.append(event)
|
||||||
|
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
|
||||||
|
def _restrict_cycles_by_pair(
|
||||||
|
cycles_by_pair: Mapping[str, list[TriangularCycle]],
|
||||||
|
*,
|
||||||
|
pair_universe: set[str],
|
||||||
|
) -> dict[str, list[TriangularCycle]]:
|
||||||
|
restricted: dict[str, list[TriangularCycle]] = {}
|
||||||
|
for pair_symbol, cycles in cycles_by_pair.items():
|
||||||
|
normalized_pair = pair_symbol.upper()
|
||||||
|
if normalized_pair not in pair_universe:
|
||||||
|
continue
|
||||||
|
|
||||||
|
kept = [cycle for cycle in cycles if all(
|
||||||
|
pair.upper() in pair_universe for pair in cycle.pairs)]
|
||||||
|
if kept:
|
||||||
|
restricted[normalized_pair] = kept
|
||||||
|
return restricted
|
||||||
|
|
||||||
|
|
||||||
|
def _score_report(report: BacktestReport) -> float:
|
||||||
|
win_rate_bonus = (report.win_rate or 0.0) * 100.0
|
||||||
|
fill_rate_bonus = (report.fill_rate or 0.0) * 50.0
|
||||||
|
return report.realized_pnl_usd + win_rate_bonus + fill_rate_bonus - report.max_drawdown_usd
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_ratio(numerator: float, denominator: float) -> float:
|
||||||
|
if denominator <= 0.0:
|
||||||
|
return 0.0 if numerator <= 0.0 else 1.0
|
||||||
|
return max(0.0, numerator / denominator)
|
||||||
|
|
||||||
|
|
||||||
|
def _evaluate_promotion(
|
||||||
|
*,
|
||||||
|
result: SweepResult,
|
||||||
|
criteria: PromotionCriteria,
|
||||||
|
) -> tuple[bool, tuple[str, ...]]:
|
||||||
|
reasons: list[str] = []
|
||||||
|
test = result.test_report
|
||||||
|
|
||||||
|
if test.realized_pnl_usd < criteria.min_test_realized_pnl_usd:
|
||||||
|
reasons.append(
|
||||||
|
"test_realized_pnl_below_threshold"
|
||||||
|
)
|
||||||
|
if (test.win_rate or 0.0) < criteria.min_test_win_rate:
|
||||||
|
reasons.append("test_win_rate_below_threshold")
|
||||||
|
if (test.fill_rate or 0.0) < criteria.min_test_fill_rate:
|
||||||
|
reasons.append("test_fill_rate_below_threshold")
|
||||||
|
if test.max_drawdown_usd > criteria.max_test_drawdown_usd:
|
||||||
|
reasons.append("test_drawdown_above_threshold")
|
||||||
|
if result.generalization_gap_ratio > criteria.max_generalization_gap_ratio:
|
||||||
|
reasons.append("generalization_gap_above_threshold")
|
||||||
|
|
||||||
|
return (not reasons), tuple(reasons)
|
||||||
|
|
||||||
|
|
||||||
|
def _run_backtest(
|
||||||
|
*,
|
||||||
|
events: Sequence[ReplayBookEvent],
|
||||||
|
cycles_by_pair: Mapping[str, list[TriangularCycle]],
|
||||||
|
available_pairs: Sequence[str],
|
||||||
|
config: BacktestConfig,
|
||||||
|
starting_balances: Mapping[str, float],
|
||||||
|
) -> BacktestReport:
|
||||||
|
started_at = events[0].occurred_at if events else datetime.now(UTC)
|
||||||
|
engine = BacktestReplayEngine(
|
||||||
|
cycles_by_pair=cycles_by_pair,
|
||||||
|
available_pairs=available_pairs,
|
||||||
|
config=config,
|
||||||
|
started_at=started_at,
|
||||||
|
)
|
||||||
|
return asyncio.run(engine.run(events, starting_balances=starting_balances))
|
||||||
|
|
||||||
|
|
||||||
|
def run_parameter_search(
|
||||||
|
*,
|
||||||
|
events: Sequence[ReplayBookEvent],
|
||||||
|
cycles_by_pair: Mapping[str, list[TriangularCycle]],
|
||||||
|
parameter_grid: Sequence[SweepParameters],
|
||||||
|
starting_balances: Mapping[str, float],
|
||||||
|
train_ratio: float,
|
||||||
|
promotion_criteria: PromotionCriteria | None = None,
|
||||||
|
max_concurrent_trades: int = 1,
|
||||||
|
max_depth_levels: int = 10,
|
||||||
|
quote_asset: str = "USD",
|
||||||
|
) -> SweepArtifacts:
|
||||||
|
criteria = promotion_criteria or PromotionCriteria()
|
||||||
|
train_events, test_events = split_events_time_windows(
|
||||||
|
events, train_ratio=train_ratio)
|
||||||
|
|
||||||
|
results: list[SweepResult] = []
|
||||||
|
promoted: list[SweepResult] = []
|
||||||
|
|
||||||
|
for parameters in parameter_grid:
|
||||||
|
allowed_pairs = set(parameters.pair_universe)
|
||||||
|
filtered_train = _filter_events_for_parameters(
|
||||||
|
train_events,
|
||||||
|
pair_universe=allowed_pairs,
|
||||||
|
staleness_threshold_seconds=parameters.staleness_threshold_seconds,
|
||||||
|
)
|
||||||
|
filtered_test = _filter_events_for_parameters(
|
||||||
|
test_events,
|
||||||
|
pair_universe=allowed_pairs,
|
||||||
|
staleness_threshold_seconds=parameters.staleness_threshold_seconds,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not filtered_train or not filtered_test:
|
||||||
|
continue
|
||||||
|
|
||||||
|
restricted_cycles = _restrict_cycles_by_pair(
|
||||||
|
cycles_by_pair,
|
||||||
|
pair_universe=allowed_pairs,
|
||||||
|
)
|
||||||
|
if not restricted_cycles:
|
||||||
|
continue
|
||||||
|
|
||||||
|
config = BacktestConfig(
|
||||||
|
min_profit_threshold=parameters.min_profit_threshold,
|
||||||
|
trade_capital=parameters.trade_capital,
|
||||||
|
max_concurrent_trades=max_concurrent_trades,
|
||||||
|
max_depth_levels=max_depth_levels,
|
||||||
|
quote_asset=quote_asset,
|
||||||
|
)
|
||||||
|
|
||||||
|
train_report = _run_backtest(
|
||||||
|
events=filtered_train,
|
||||||
|
cycles_by_pair=restricted_cycles,
|
||||||
|
available_pairs=sorted(allowed_pairs),
|
||||||
|
config=config,
|
||||||
|
starting_balances=starting_balances,
|
||||||
|
)
|
||||||
|
test_report = _run_backtest(
|
||||||
|
events=filtered_test,
|
||||||
|
cycles_by_pair=restricted_cycles,
|
||||||
|
available_pairs=sorted(allowed_pairs),
|
||||||
|
config=config,
|
||||||
|
starting_balances=starting_balances,
|
||||||
|
)
|
||||||
|
|
||||||
|
train_score = _score_report(train_report)
|
||||||
|
test_score = _score_report(test_report)
|
||||||
|
score_drop = max(0.0, train_score - test_score)
|
||||||
|
generalization_gap_ratio = _safe_ratio(score_drop, abs(train_score))
|
||||||
|
overfit_detected = generalization_gap_ratio > criteria.max_generalization_gap_ratio
|
||||||
|
|
||||||
|
base_result = SweepResult(
|
||||||
|
parameters=parameters,
|
||||||
|
train_report=train_report,
|
||||||
|
test_report=test_report,
|
||||||
|
train_score=train_score,
|
||||||
|
test_score=test_score,
|
||||||
|
generalization_gap_ratio=generalization_gap_ratio,
|
||||||
|
overfit_detected=overfit_detected,
|
||||||
|
promotion_ready=False,
|
||||||
|
promotion_reasons=(),
|
||||||
|
train_event_count=len(filtered_train),
|
||||||
|
test_event_count=len(filtered_test),
|
||||||
|
)
|
||||||
|
promotion_ready, promotion_reasons = _evaluate_promotion(
|
||||||
|
result=base_result, criteria=criteria)
|
||||||
|
completed_result = SweepResult(
|
||||||
|
parameters=base_result.parameters,
|
||||||
|
train_report=base_result.train_report,
|
||||||
|
test_report=base_result.test_report,
|
||||||
|
train_score=base_result.train_score,
|
||||||
|
test_score=base_result.test_score,
|
||||||
|
generalization_gap_ratio=base_result.generalization_gap_ratio,
|
||||||
|
overfit_detected=base_result.overfit_detected,
|
||||||
|
promotion_ready=promotion_ready,
|
||||||
|
promotion_reasons=promotion_reasons,
|
||||||
|
train_event_count=base_result.train_event_count,
|
||||||
|
test_event_count=base_result.test_event_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
results.append(completed_result)
|
||||||
|
if completed_result.promotion_ready:
|
||||||
|
promoted.append(completed_result)
|
||||||
|
|
||||||
|
results.sort(key=lambda item: item.test_score, reverse=True)
|
||||||
|
promoted.sort(key=lambda item: item.test_score, reverse=True)
|
||||||
|
|
||||||
|
train_window: tuple[datetime, datetime] | None = None
|
||||||
|
test_window: tuple[datetime, datetime] | None = None
|
||||||
|
if train_events:
|
||||||
|
train_window = (train_events[0].occurred_at,
|
||||||
|
train_events[-1].occurred_at)
|
||||||
|
if test_events:
|
||||||
|
test_window = (test_events[0].occurred_at, test_events[-1].occurred_at)
|
||||||
|
|
||||||
|
return SweepArtifacts(
|
||||||
|
results=tuple(results),
|
||||||
|
promoted=tuple(promoted),
|
||||||
|
train_window=train_window,
|
||||||
|
test_window=test_window,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _report_to_dict(report: BacktestReport) -> dict[str, object]:
|
||||||
|
return {
|
||||||
|
"started_at": report.started_at.isoformat(),
|
||||||
|
"finished_at": report.finished_at.isoformat(),
|
||||||
|
"processed_events": report.processed_events,
|
||||||
|
"opportunities_seen": report.opportunities_seen,
|
||||||
|
"trades_executed": report.trades_executed,
|
||||||
|
"win_rate": report.win_rate,
|
||||||
|
"fill_rate": report.fill_rate,
|
||||||
|
"realized_pnl_usd": report.realized_pnl_usd,
|
||||||
|
"max_drawdown_usd": report.max_drawdown_usd,
|
||||||
|
"miss_reasons": dict(report.miss_reasons),
|
||||||
|
"execution_latency_p50_ms": report.execution_latency_p50_ms,
|
||||||
|
"execution_latency_p95_ms": report.execution_latency_p95_ms,
|
||||||
|
"execution_latency_p99_ms": report.execution_latency_p99_ms,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def persist_sweep_results(path: Path, artifacts: SweepArtifacts) -> None:
|
||||||
|
payload = {
|
||||||
|
"generated_at": datetime.now(UTC).isoformat(),
|
||||||
|
"train_window": (
|
||||||
|
{
|
||||||
|
"started_at": artifacts.train_window[0].isoformat(),
|
||||||
|
"finished_at": artifacts.train_window[1].isoformat(),
|
||||||
|
}
|
||||||
|
if artifacts.train_window is not None
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
"test_window": (
|
||||||
|
{
|
||||||
|
"started_at": artifacts.test_window[0].isoformat(),
|
||||||
|
"finished_at": artifacts.test_window[1].isoformat(),
|
||||||
|
}
|
||||||
|
if artifacts.test_window is not None
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"parameters": {
|
||||||
|
"min_profit_threshold": result.parameters.min_profit_threshold,
|
||||||
|
"trade_capital": result.parameters.trade_capital,
|
||||||
|
"pair_universe": list(result.parameters.pair_universe),
|
||||||
|
"staleness_threshold_seconds": result.parameters.staleness_threshold_seconds,
|
||||||
|
},
|
||||||
|
"train_report": _report_to_dict(result.train_report),
|
||||||
|
"test_report": _report_to_dict(result.test_report),
|
||||||
|
"train_score": result.train_score,
|
||||||
|
"test_score": result.test_score,
|
||||||
|
"generalization_gap_ratio": result.generalization_gap_ratio,
|
||||||
|
"overfit_detected": result.overfit_detected,
|
||||||
|
"promotion_ready": result.promotion_ready,
|
||||||
|
"promotion_reasons": list(result.promotion_reasons),
|
||||||
|
"train_event_count": result.train_event_count,
|
||||||
|
"test_event_count": result.test_event_count,
|
||||||
|
}
|
||||||
|
for result in artifacts.results
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_bytes(orjson.dumps(
|
||||||
|
payload, option=orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS))
|
||||||
@@ -0,0 +1,102 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import UTC, datetime, timedelta
|
||||||
|
|
||||||
|
from arbitrade.backtesting.replay import ReplayBookEvent
|
||||||
|
from arbitrade.backtesting.sweep import (
|
||||||
|
PromotionCriteria,
|
||||||
|
SweepResult,
|
||||||
|
build_parameter_grid,
|
||||||
|
run_parameter_search,
|
||||||
|
split_events_time_windows,
|
||||||
|
)
|
||||||
|
from arbitrade.detection.graph import CurrencyGraph
|
||||||
|
from arbitrade.exchange.models import BookLevel
|
||||||
|
|
||||||
|
|
||||||
|
def _build_cycles() -> dict[str, list]:
|
||||||
|
graph = CurrencyGraph()
|
||||||
|
graph.add_pair("USD", "BTC", "BTC/USD")
|
||||||
|
graph.add_pair("BTC", "ETH", "ETH/BTC")
|
||||||
|
graph.add_pair("ETH", "USD", "ETH/USD")
|
||||||
|
return graph.index_cycles_by_pair(graph.triangular_cycles())
|
||||||
|
|
||||||
|
|
||||||
|
def _events() -> list[ReplayBookEvent]:
|
||||||
|
base_time = datetime(2026, 6, 1, 12, 0, tzinfo=UTC)
|
||||||
|
rows: list[ReplayBookEvent] = []
|
||||||
|
for index in range(12):
|
||||||
|
tick = base_time + timedelta(seconds=index)
|
||||||
|
rows.extend(
|
||||||
|
[
|
||||||
|
ReplayBookEvent(
|
||||||
|
occurred_at=tick,
|
||||||
|
symbol="BTC/USD",
|
||||||
|
bids=(BookLevel(price=99.5, volume=10.0),),
|
||||||
|
asks=(BookLevel(price=100.0, volume=10.0),),
|
||||||
|
),
|
||||||
|
ReplayBookEvent(
|
||||||
|
occurred_at=tick,
|
||||||
|
symbol="ETH/BTC",
|
||||||
|
bids=(BookLevel(price=0.051, volume=10.0),),
|
||||||
|
asks=(BookLevel(price=0.050, volume=10.0),),
|
||||||
|
),
|
||||||
|
ReplayBookEvent(
|
||||||
|
occurred_at=tick,
|
||||||
|
symbol="ETH/USD",
|
||||||
|
bids=(BookLevel(price=110.0, volume=10.0),),
|
||||||
|
asks=(BookLevel(price=110.5, volume=10.0),),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def test_split_events_time_windows_returns_non_empty_train_and_test() -> None:
|
||||||
|
train, test = split_events_time_windows(_events(), train_ratio=0.7)
|
||||||
|
|
||||||
|
assert train
|
||||||
|
assert test
|
||||||
|
assert train[-1].occurred_at <= test[0].occurred_at
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_parameter_grid_expands_combinations() -> None:
|
||||||
|
grid = build_parameter_grid(
|
||||||
|
theta_values=[0.0005, 0.001],
|
||||||
|
trade_capital_values=[100.0],
|
||||||
|
pair_universes=[["BTC/USD", "ETH/BTC", "ETH/USD"]],
|
||||||
|
staleness_threshold_values=[3.0, 5.0],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(grid) == 4
|
||||||
|
|
||||||
|
|
||||||
|
def test_run_parameter_search_produces_ranked_results_with_overfit_guard() -> None:
|
||||||
|
artifacts = run_parameter_search(
|
||||||
|
events=_events(),
|
||||||
|
cycles_by_pair=_build_cycles(),
|
||||||
|
parameter_grid=build_parameter_grid(
|
||||||
|
theta_values=[0.0005, 0.001],
|
||||||
|
trade_capital_values=[75.0, 100.0],
|
||||||
|
pair_universes=[["BTC/USD", "ETH/BTC", "ETH/USD"]],
|
||||||
|
staleness_threshold_values=[5.0],
|
||||||
|
),
|
||||||
|
starting_balances={"USD": 2000.0},
|
||||||
|
train_ratio=0.7,
|
||||||
|
promotion_criteria=PromotionCriteria(
|
||||||
|
min_test_realized_pnl_usd=-1000.0,
|
||||||
|
min_test_win_rate=0.0,
|
||||||
|
min_test_fill_rate=0.0,
|
||||||
|
max_test_drawdown_usd=1_000_000.0,
|
||||||
|
max_generalization_gap_ratio=0.9,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert artifacts.results
|
||||||
|
assert artifacts.results[0].test_score >= artifacts.results[-1].test_score
|
||||||
|
|
||||||
|
first: SweepResult = artifacts.results[0]
|
||||||
|
assert first.train_event_count > 0
|
||||||
|
assert first.test_event_count > 0
|
||||||
|
assert first.generalization_gap_ratio >= 0.0
|
||||||
|
assert isinstance(first.promotion_ready, bool)
|
||||||
Reference in New Issue
Block a user