From 38e1d644378579c0d83a776faaad4786fb9aabcc Mon Sep 17 00:00:00 2001 From: zwitschi Date: Tue, 2 Jun 2026 09:28:22 +0200 Subject: [PATCH] feat: add backtesting functionality with UI and API endpoints - Introduced backtesting page and fragment in the dashboard for running backtests and viewing recent reports. - Implemented backtest run logic with configuration options including event path, starting balances, trade capital, and fee profiles. - Added recent backtest reports storage and retrieval. - Created a new strategy module for statistical arbitrage experiments with validation on configuration parameters. - Updated settings to include parameters for the statistical arbitrage strategy. - Enhanced dashboard controls to support the new strategy mode. - Added unit tests for backtesting functionality and strategy validation. - Updated templates for backtesting UI integration. --- .env.example | 5 + CHANGELOG.md | 7 + README.md | 124 +------- docs/architecture/arc42.md | 156 ++++++++++ docs/architecture/current-implementation.md | 47 +++ src/arbitrade/api/app.py | 1 + src/arbitrade/api/routes.py | 284 +++++++++++++++++- src/arbitrade/config/settings.py | 148 ++++++--- src/arbitrade/strategy/__init__.py | 5 + src/arbitrade/strategy/stat_arb.py | 152 ++++++++++ tests/test_dashboard.py | 69 ++++- tests/unit/test_settings_validation.py | 17 ++ tests/unit/test_stat_arb_experiment.py | 66 ++++ web/templates/backtesting.html | 24 ++ web/templates/dashboard.html | 1 + web/templates/partials/backtesting_panel.html | 142 +++++++++ web/templates/partials/controls.html | 6 + 17 files changed, 1089 insertions(+), 165 deletions(-) create mode 100644 docs/architecture/arc42.md create mode 100644 docs/architecture/current-implementation.md create mode 100644 src/arbitrade/strategy/__init__.py create mode 100644 src/arbitrade/strategy/stat_arb.py create mode 100644 tests/unit/test_stat_arb_experiment.py create mode 100644 web/templates/backtesting.html create mode 100644 web/templates/partials/backtesting_panel.html diff --git a/.env.example b/.env.example index 75c25a3..a0185df 100644 --- a/.env.example +++ b/.env.example @@ -49,3 +49,8 @@ CUMULATIVE_LOSS_LIMIT_USD=10.0 MAX_SOURCE_LATENCY_MS= MAX_APPLY_LATENCY_MS= MAX_CONSECUTIVE_FAILURES= +STRATEGY_ENABLE_STAT_ARB_EXPERIMENT=false +STRATEGY_STAT_ARB_LOOKBACK_WINDOW=120 +STRATEGY_STAT_ARB_ENTRY_ZSCORE=2.0 +STRATEGY_STAT_ARB_EXIT_ZSCORE=0.5 +STRATEGY_STAT_ARB_MAX_HOLDING_SECONDS=900.0 diff --git a/CHANGELOG.md b/CHANGELOG.md index 4298045..511ffed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,11 @@ - Added persisted sweep artifacts with ranked in-sample/out-of-sample results and promotion-ready candidate reporting. - Added out-of-sample overfit guards via train/test time-window split and generalization-gap checks. - Added dashboard controls for tradable pair universe selection and strategy mode/parameter configuration. +- Added feature-flagged statistical arbitrage experiment scaffold (`src/arbitrade/strategy/stat_arb.py`) with mean-reversion signal lifecycle. +- Added strategy feature-flag settings for statistical arbitrage experiment activation and z-score/holding-window controls. +- Added unit coverage for statistical arbitrage experiment behavior and new strategy settings validation rules. +- Added dedicated backtesting dashboard page (`/dashboard/backtesting`) with run controls for replay path, fee profile, balances, slippage, and execution latency. +- Added backtesting run/report endpoints (`/dashboard/backtesting/run`, `/dashboard/api/backtesting/reports`) and recent-report history surfaced in UI. ### Changed @@ -33,6 +38,8 @@ - Added backtesting usage and replay format documentation to README. - Dashboard controls now surface tradable pairs and strategy config snapshot values. - CI now publishes `git.allucanget.biz/allucanget/arbitrade:latest`, and README now documents Coolify image deployment with runtime environment variables managed in Coolify. +- Dashboard strategy-mode validation now allows `stat_arb_experiment` only when feature flag is enabled. +- README now documents deferred cross-exchange architecture requirements, risk assumptions, and promotion milestones for strategy expansion. ### Removed diff --git a/README.md b/README.md index 9a31440..c16b974 100644 --- a/README.md +++ b/README.md @@ -349,125 +349,11 @@ Example pushed image tag shape: git.allucanget.biz/allucanget/arbitrade:latest ``` -## Project Layout +## Architecture Docs -```text -arbitrade/ -├── .gitea/workflows/ci.yml -├── .github/instructions/TODO.md -├── PLAN.md -├── pyproject.toml -├── src/arbitrade/ -│ ├── api/ -│ ├── config/ -│ ├── storage/ -│ ├── logging_setup.py -│ └── main.py -├── tests/ -└── web/templates/ -``` +Implementation detail moved into arc42 docs: -## Next Work +- [arc42 overview](docs/architecture/arc42.md) - system context, building blocks, runtime, deployment, quality goals, risks. +- [current implementation snapshot](docs/architecture/current-implementation.md) - codebase state, active routes, backtesting, strategy flags, deployment flow. -Next planned implementation slice: - -- Kraken REST client skeleton -- native Kraken WebSocket client -- in-memory order book cache -- latency instrumentation - -## Troubleshooting - -PowerShell blocks activation script: - -```powershell -Set-ExecutionPolicy -Scope Process -ExecutionPolicy RemoteSigned -``` - -Then activate again: - -```powershell -.\.venv\Scripts\Activate.ps1 -``` - -If app import fails, confirm editable install ran: - -```powershell -uv pip install -e .[dev] -``` - -If DuckDB file missing, start app once or create `data/` directory manually. - -## Security Hardening - -Threat model notes: - -- Primary risk surfaces: environment secrets, dashboard auth credentials, exchange API key scope, and dependency supply chain. -- Assumed attacker model: leaked repository content, leaked CI logs/artifacts, or unauthorized dashboard access. -- High-impact outcomes to prevent: credential exfiltration, unauthorized withdrawals, and unsafe live-trading control changes. - -Hardening checklist: - -- Use least-privilege Kraken API keys: query + trade only; never enable withdrawal. -- Rotate API keys immediately if secret scan flags a potential exposure. -- Keep dashboard auth enabled in non-local environments and avoid default/shared credentials. -- Run `pip-audit -r requirements/latest-runtime.in` in CI; treat vulnerability findings as release blockers. -- Run `python scripts/security_scan.py` before release and after major merges. -- Store secrets in environment/secret manager; never commit `.env` or key material. - -## Performance Hardening - -Profile scenarios: - -- `book_update_burst` -- `execution_spike` -- `reconnect_storm` - -## Backtesting - -Run a deterministic replay backtest from a JSONL event stream: - -```powershell -python scripts/backtest_replay.py --events path\to\replay.jsonl --starting-balances USD=1000.0 -``` - -Run parameter sweep with train/test split and promotion scoring: - -```powershell -python scripts/backtest_sweep.py --events path\to\replay.jsonl --starting-balances USD=1000.0 --output ops/backtesting/parameter_sweep_results.json -``` - -Replay event format: - -```json -{ - "timestamp": "2026-06-01T12:00:00Z", - "symbol": "BTC/USD", - "bids": [[100.0, 1.0]], - "asks": [[101.0, 1.0]] -} -``` - -Notes: - -- Events are replayed in timestamp order. -- The replay engine reuses the production detector, pre-trade validation, trade limits, and execution sequencer. -- The simulated execution path applies configurable slippage and execution latency so reports include deterministic trade/miss statistics. -- Parameter sweep splits replay data into in-sample and out-of-sample windows, ranks configurations by out-of-sample score, and flags overfit via train/test generalization-gap checks. -- Sweep output persists ranked combinations and promotion-ready candidates for paper-trading canary promotion decisions. -- Latency baseline and threshold artifacts: - -- `ops/performance/latency_baseline.json` -- `ops/performance/latency_thresholds.json` - -CI guardrail: - -- `.gitea/workflows/ci.yml` runs `scripts/check_latency_regression.py` and fails on regression. - -Measured optimization impact (2026-06-01): - -- `MetricsCalculator.compute()` switched from Python row scans to DuckDB SQL aggregates/quantiles. -- Benchmark (`scripts/benchmark_metrics_compute.py`): - - Python scan avg: `12.623 ms` - - SQL aggregate avg: `11.039 ms` - - Speedup: `1.14x` +For navigation from README, use the docs above instead of this file for deep architecture detail. diff --git a/docs/architecture/arc42.md b/docs/architecture/arc42.md new file mode 100644 index 0000000..c7057f4 --- /dev/null +++ b/docs/architecture/arc42.md @@ -0,0 +1,156 @@ +# Arbitrade Architecture Overview (arc42) + +## 1. Introduction and Goals + +Arbitrade is a Python 3.12+ cryptocurrency arbitrage bot for Kraken, focused on triangular arbitrage on a single exchange. The system is designed for low-latency detection, configurable risk control, replayable backtesting, and operator-visible dashboard control. + +Primary goals: + +- Detect and execute triangular opportunities on Kraken with fee/slippage-aware math. +- Keep hot-path latency low with incremental order-book updates and event-driven scoring. +- Persist operational data in DuckDB for dev, test, and prod. +- Provide operator controls, audit trail, and alerting through a server-rendered dashboard. +- Support backtesting, parameter sweeps, and deferred experimental strategy work behind feature flags. + +## 2. Constraints + +- Python 3.12+ runtime. +- Native Kraken WebSocket on the hot path. +- HTMX + Jinja2 UI, no SPA build step. +- DuckDB everywhere. +- Self-hosted Gitea Actions CI and Gitea registry. +- Windows development support. +- Secrets must stay out of the repository. + +## 3. Context and Scope + +### 3.1 Business Context + +The bot consumes Kraken market data, detects opportunities, and executes trades or paper-trades depending on configuration. Operators monitor and control the system through a dashboard and alerting channels. + +### 3.2 Technical Context + +- Kraken REST + WebSocket provide market data and execution. +- FastAPI serves HTML fragments, JSON endpoints, and SSE streams. +- DuckDB stores trades, opportunities, snapshots, audit events, and runtime state. +- Coolify can deploy the published image using environment variables and persistent storage. + +## 4. Solution Strategy + +- Use an incremental currency graph to re-score only cycles touched by a changed pair. +- Use top-of-book plus depth-aware pricing and configurable fee/slippage buffers. +- Use a single-process asyncio model with uvloop where available. +- Keep strategy, risk, execution, and backtesting logic reusable across paper and replay modes. +- Expose configuration through the dashboard and environment variables. + +## 5. Building Block View + +### 5.1 Runtime Blocks + +- `api/` - FastAPI app, routes, dashboard fragments, backtesting endpoints. +- `exchange/` - Kraken REST and WebSocket integration. +- `market_data/` - live order-book state and ingestion. +- `detection/` - triangular graph and incremental detector. +- `risk/` - pre-trade and trade-limit guards. +- `execution/` - multi-leg trade sequencing. +- `backtesting/` - replay engine, parameter sweep, experiment scaffolds. +- `strategy/` - experimental strategy modules such as stat-arb. +- `storage/` - DuckDB schema and repositories. +- `alerting/` - multi-channel notifications. +- `runtime/` - startup recovery and graceful shutdown. + +### 5.2 Important Dependencies + +- `fastapi`, `uvicorn`, `jinja2`, `htmx`-driven templates. +- `orjson` for low-alloc parsing. +- `sortedcontainers` for book state. +- `duckdb` for persistence and analytics. +- `pydantic` / `pydantic-settings` for typed configuration. +- `cryptography` / keyring for secret handling. + +## 6. Runtime View + +### 6.1 Live Trading Flow + +1. Kraken WS delivers book updates. +2. Order book updates in memory. +3. Incremental detector scores impacted cycles. +4. Risk manager validates the opportunity. +5. Execution sequencer places legs if approved. +6. Trades and snapshots persist to DuckDB. +7. Dashboard and alerts reflect state changes. + +### 6.2 Dashboard Control Flow + +- `/dashboard/control/*` mutates runtime state. +- `/dashboard/fragment/*` renders HTMX partials. +- `/dashboard/stream/*` provides SSE live updates. +- `/dashboard/backtesting` provides a dedicated replay control page. + +### 6.3 Backtesting Flow + +1. User selects JSONL replay file and run parameters. +2. Replay engine loads ordered book events. +3. Detector, risk, and execution logic run in simulation mode. +4. Report is stored in memory for recent UI display. +5. Parameter sweeps split data into train/test windows, rank results, and flag overfit. + +## 7. Deployment View + +### 7.1 Local Development + +- `uv venv` for environment creation. +- `uv pip install -e .[dev]` for editable install. +- `docker compose up --build` for local container workflow. + +### 7.2 CI/CD + +- Gitea Actions runs lint, tests, security checks, latency guards, and image publish. +- CI publishes `git.allucanget.biz/allucanget/arbitrade:latest`. + +### 7.3 Coolify + +- Deploy from the published image. +- Configure runtime via environment variables. +- Mount persistent storage at `/app/data` for DuckDB. + +## 8. Cross-Cutting Concepts + +- Staleness checks prevent stale book execution. +- Kill switch halts execution immediately. +- Audit trail records dashboard and runtime decisions. +- Alerting spans Telegram, Discord, and email. +- Feature flags gate experimental strategy code. +- Config is environment-driven and validated at startup. + +## 9. Architecture Decisions + +- Native Kraken WS instead of a generic exchange abstraction on the hot path. +- DuckDB as the single database engine. +- HTMX + Jinja2 instead of SPA frontend. +- Backtesting reuses production detector/risk/execution logic. +- Experimental stat-arb stays behind a feature flag. +- Published image is the deployment artifact; Coolify owns runtime env vars. + +## 10. Quality Requirements + +- Low latency on book-update-to-decision path. +- Safe startup and restart behavior. +- Strong operator visibility. +- Reproducible backtests and sweeps. +- Secrets protection and strict validation. + +## 11. Risks and Technical Debt + +- Exchange API schema changes. +- Spread decay and execution slippage. +- Cross-venue strategy complexity if/when enabled. +- UI and backtesting paths can drift if not kept aligned with production logic. + +## 12. Glossary + +- WS: WebSocket. +- HTMX: HTML-over-the-wire UI library. +- SSE: Server-Sent Events. +- DUCKDB: Embedded analytical database used for all environments. +- Stat arb: Statistical arbitrage, currently experimental and feature-flagged. diff --git a/docs/architecture/current-implementation.md b/docs/architecture/current-implementation.md new file mode 100644 index 0000000..6e0e64a --- /dev/null +++ b/docs/architecture/current-implementation.md @@ -0,0 +1,47 @@ +# Current Implementation Snapshot + +This document summarizes the code that exists now, not the original plan. + +## Runtime + +- FastAPI app starts from [src/arbitrade/api/app.py](../../src/arbitrade/api/app.py). +- Settings come from `pydantic-settings` in [src/arbitrade/config/settings.py](../../src/arbitrade/config/settings.py). +- DuckDB is initialized and migrated on startup. +- Runtime recovery persists and restores control state and snapshots. + +## Market Data and Detection + +- Kraken market data is handled by native WS and thin REST code. +- Incremental triangular detector is implemented in [src/arbitrade/detection/engine.py](../../src/arbitrade/detection/engine.py). +- Currency graph and cycle indexing live in [src/arbitrade/detection/graph.py](../../src/arbitrade/detection/graph.py). + +## Execution and Risk + +- Multi-leg execution sequencer exists for triangular cycles. +- Pre-trade validation and trade-limit guards are wired into execution flow. +- Kill switch and stop conditions are supported. + +## Dashboard + +- Server-rendered dashboard uses FastAPI + Jinja2 + HTMX. +- Live metrics, overview, controls, charts, and audit fragments are exposed as separate endpoints. +- Dedicated backtesting page exists at `/dashboard/backtesting`. + +## Backtesting + +- Replay engine lives in [src/arbitrade/backtesting/replay.py](../../src/arbitrade/backtesting/replay.py). +- Parameter sweep runner lives in [src/arbitrade/backtesting/sweep.py](../../src/arbitrade/backtesting/sweep.py). +- Backtesting UI runs replay from a JSONL file, stores recent reports in app state, and exposes a recent-reports API. +- Experimental stat-arb scaffold lives in [src/arbitrade/strategy/stat_arb.py](../../src/arbitrade/strategy/stat_arb.py) and is gated by feature flag. + +## Deployment + +- Dockerfile installs runtime dependencies from `requirements/latest-runtime.in`. +- CI publishes `git.allucanget.biz/allucanget/arbitrade:latest`. +- Coolify deploys the prebuilt image and owns runtime env vars and persistent storage. + +## Current Gaps + +- Cross-exchange arbitrage remains deferred. +- Stat-arb is experimental, not part of default live strategy. +- Backtesting UI is functional but still a single-run/report workflow, not a full job queue. diff --git a/src/arbitrade/api/app.py b/src/arbitrade/api/app.py index a39fd29..d7dd11e 100644 --- a/src/arbitrade/api/app.py +++ b/src/arbitrade/api/app.py @@ -35,6 +35,7 @@ def create_app(settings: Settings) -> FastAPI: app.state.audit_repository = AuditRepository(db) app.state.runtime_state_repository = RuntimeStateRepository(db) app.state.alert_notifier = build_notifier_from_settings(settings) + app.state.backtest_recent_reports = [] app.state.dashboard_controls = DashboardControlState( is_running=not settings.kill_switch_active, ) diff --git a/src/arbitrade/api/routes.py b/src/arbitrade/api/routes.py index 627cb31..ff5c36d 100644 --- a/src/arbitrade/api/routes.py +++ b/src/arbitrade/api/routes.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +from asyncio import Lock from collections.abc import AsyncIterator from datetime import UTC, datetime from pathlib import Path @@ -15,6 +16,8 @@ from fastapi.templating import Jinja2Templates from arbitrade.alerting.notifier import SupportsAlerts, SupportsAlertStatus from arbitrade.api.auth import require_dashboard_auth from arbitrade.api.control_state import DashboardControlState +from arbitrade.backtesting.replay import BacktestConfig, BacktestReplayEngine, load_replay_events +from arbitrade.detection.graph import CurrencyGraph, TriangularCycle from arbitrade.storage.repositories import AuditRecord, AuditRepository router = APIRouter(dependencies=[Depends(require_dashboard_auth)]) @@ -22,6 +25,8 @@ public_router = APIRouter() templates = Jinja2Templates( directory=str(Path(__file__).resolve().parents[3] / "web" / "templates") ) +_BACKTEST_ROOT = Path(__file__).resolve().parents[3] +_BACKTEST_RUN_LOCK = Lock() def _format_metric(value: float | None, *, precision: int = 2, suffix: str = "") -> str: @@ -295,6 +300,8 @@ def _dashboard_controls(request: Request) -> dict[str, object]: alerts_last_channel_results = [ str(item) for item in cast(list[object], alert_status.get("last_channel_results", [])) ] + strategy_stat_arb_enabled = bool( + getattr(rs, "strategy_enable_stat_arb_experiment", False)) return { "execution_status": "running" if ctl.is_running else "stopped", @@ -320,6 +327,7 @@ def _dashboard_controls(request: Request) -> dict[str, object]: "tradable_pairs_display": tpd, "tradable_pairs_value": ", ".join(ctl.tradable_pairs), "strategy_mode": ctl.strategy_mode, + "strategy_stat_arb_enabled": strategy_stat_arb_enabled, "strategy_profit_threshold": f"{ctl.strategy_profit_threshold:.6f}", "strategy_max_depth_levels": str(ctl.strategy_max_depth_levels), "updated_at": ctl.updated_at.isoformat(), @@ -354,6 +362,115 @@ def _parse_comma_separated_list(value: str | None) -> list[str]: return items +def _normalize_fee_profile(profile: str) -> str: + return profile.strip().lower().replace("-", "_") + + +def _fee_rate_for_profile(profile: str, custom_fee_rate: float | None) -> float: + normalized = _normalize_fee_profile(profile) + profile_map = { + "standard": 0.0026, + "maker_heavy": 0.0016, + "taker_heavy": 0.0035, + } + if normalized == "custom": + if custom_fee_rate is None: + raise ValueError("custom fee profile requires custom_fee_rate") + if custom_fee_rate < 0.0: + raise ValueError("custom_fee_rate must be >= 0") + return custom_fee_rate + if normalized not in profile_map: + valid = ", ".join(sorted(list(profile_map.keys()) + ["custom"])) + raise ValueError(f"fee_profile must be one of: {valid}") + return profile_map[normalized] + + +def _parse_balances(raw: str) -> dict[str, float]: + balances: dict[str, float] = {} + for entry in raw.split(","): + stripped = entry.strip() + if not stripped: + continue + if "=" not in stripped: + raise ValueError("starting_balances must be in ASSET=value format") + asset, value = stripped.split("=", 1) + balances[asset.strip().upper()] = float(value) + if not balances: + raise ValueError("starting_balances must include at least one balance") + return balances + + +def _resolve_workspace_path(raw: str) -> Path: + candidate = Path(raw.strip()) + if not candidate.is_absolute(): + candidate = (_BACKTEST_ROOT / candidate).resolve() + else: + candidate = candidate.resolve() + return candidate + + +def _display_path(path: Path) -> str: + try: + return str(path.relative_to(_BACKTEST_ROOT)) + except ValueError: + return str(path) + + +def _build_cycles_from_events( + symbols: set[str], +) -> tuple[dict[str, list[TriangularCycle]], list[str]]: + graph = CurrencyGraph() + for symbol in sorted(symbols): + if "/" not in symbol: + continue + base, quote = symbol.upper().split("/", 1) + graph.add_pair(base, quote, f"{base}/{quote}") + cycles = graph.triangular_cycles() + return graph.index_cycles_by_pair(cycles), sorted(symbols) + + +def _recent_backtest_reports(request: Request) -> list[dict[str, object]]: + reports = getattr(request.app.state, "backtest_recent_reports", []) + if isinstance(reports, list): + return cast(list[dict[str, object]], reports) + return [] + + +def _backtesting_panel_context( + request: Request, + *, + status: str = "idle", + message: str = "Configure a replay run and execute backtest.", + latest_report: dict[str, object] | None = None, + defaults: dict[str, str] | None = None, +) -> dict[str, object]: + default_values = { + "events_path": "", + "starting_balances": "USD=1000.0", + "trade_capital": "100.0", + "min_profit_threshold": "0.0005", + "fee_profile": "standard", + "custom_fee_rate": "", + "slippage_bps": "4.0", + "execution_latency_ms": "20.0", + } + if defaults is not None: + default_values.update(defaults) + + reports = _recent_backtest_reports(request) + latest = latest_report or (reports[0] if reports else None) + + return { + "status": status, + "message": message, + "latest_report": latest, + "recent_reports": reports, + "run_endpoint": "/dashboard/backtesting/run", + "reports_endpoint": "/dashboard/api/backtesting/reports", + **default_values, + } + + async def _dashboard_response( request: Request, template_name: str = "dashboard.html" ) -> HTMLResponse: @@ -384,6 +501,29 @@ async def dashboard(request: Request) -> HTMLResponse: return await _dashboard_response(request) +@router.get("/dashboard/backtesting", response_class=HTMLResponse) +async def dashboard_backtesting_page(request: Request) -> HTMLResponse: + return templates.TemplateResponse( + request=request, + name="backtesting.html", + context={ + "title": "Arbitrade Backtesting", + "request": request, + "panel_endpoint": "/dashboard/fragment/backtesting", + "dashboard_endpoint": "/dashboard", + }, + ) + + +@router.get("/dashboard/fragment/backtesting", response_class=HTMLResponse) +async def dashboard_backtesting_fragment(request: Request) -> HTMLResponse: + return templates.TemplateResponse( + request=request, + name="partials/backtesting_panel.html", + context={"request": request, **_backtesting_panel_context(request)}, + ) + + @router.get("/dashboard/fragment/metrics", response_class=HTMLResponse) async def dashboard_metrics(request: Request) -> HTMLResponse: return templates.TemplateResponse( @@ -439,6 +579,143 @@ async def dashboard_audit_recent(request: Request) -> JSONResponse: return JSONResponse(_dashboard_audit(request, limit=25)) +@router.get("/dashboard/api/backtesting/reports", response_class=JSONResponse) +async def dashboard_backtesting_reports(request: Request) -> JSONResponse: + return JSONResponse( + { + "generated_at": datetime.now(UTC).isoformat(), + "reports": _recent_backtest_reports(request), + } + ) + + +@router.post("/dashboard/backtesting/run", response_class=HTMLResponse) +async def dashboard_backtesting_run(request: Request) -> HTMLResponse: + form = _parse_form_body(await request.body()) + defaults = { + "events_path": form.get("events_path", ""), + "starting_balances": form.get("starting_balances", "USD=1000.0"), + "trade_capital": form.get("trade_capital", "100.0"), + "min_profit_threshold": form.get("min_profit_threshold", "0.0005"), + "fee_profile": _normalize_fee_profile(form.get("fee_profile", "standard")), + "custom_fee_rate": form.get("custom_fee_rate", ""), + "slippage_bps": form.get("slippage_bps", "4.0"), + "execution_latency_ms": form.get("execution_latency_ms", "20.0"), + } + + try: + events_path = _resolve_workspace_path(defaults["events_path"]) + if not events_path.exists() or not events_path.is_file(): + raise ValueError( + "events_path must reference an existing JSONL file") + + events = load_replay_events(events_path) + if not events: + raise ValueError("events file contains no replay events") + + custom_fee_rate = ( + float(defaults["custom_fee_rate"] + ) if defaults["custom_fee_rate"].strip() else None + ) + fee_rate = _fee_rate_for_profile( + defaults["fee_profile"], custom_fee_rate) + starting_balances = _parse_balances(defaults["starting_balances"]) + + trade_capital = float(defaults["trade_capital"]) + min_profit_threshold = float(defaults["min_profit_threshold"]) + slippage_bps = float(defaults["slippage_bps"]) + execution_latency_ms = float(defaults["execution_latency_ms"]) + + cycles_by_pair, available_pairs = _build_cycles_from_events( + {event.symbol.upper() for event in events} + ) + if not cycles_by_pair: + raise ValueError( + "unable to derive triangular cycles from provided events") + + config = BacktestConfig( + fee_rate=fee_rate, + min_profit_threshold=min_profit_threshold, + trade_capital=trade_capital, + slippage_bps=slippage_bps, + execution_latency_ms=execution_latency_ms, + ) + + async with _BACKTEST_RUN_LOCK: + engine = BacktestReplayEngine( + cycles_by_pair=cycles_by_pair, + available_pairs=available_pairs, + config=config, + started_at=events[0].occurred_at, + ) + report = await engine.run(events, starting_balances=starting_balances) + + report_item: dict[str, object] = { + "run_at": datetime.now(UTC).isoformat(), + "events_path": _display_path(events_path), + "status": "completed", + "config": { + "trade_capital": trade_capital, + "min_profit_threshold": min_profit_threshold, + "fee_profile": defaults["fee_profile"], + "fee_rate": fee_rate, + "slippage_bps": slippage_bps, + "execution_latency_ms": execution_latency_ms, + }, + "report": { + "processed_events": report.processed_events, + "opportunities_seen": report.opportunities_seen, + "trades_executed": report.trades_executed, + "win_rate": report.win_rate, + "fill_rate": report.fill_rate, + "realized_pnl_usd": report.realized_pnl_usd, + "max_drawdown_usd": report.max_drawdown_usd, + "miss_reasons": dict(report.miss_reasons), + "execution_latency_p50_ms": report.execution_latency_p50_ms, + "execution_latency_p95_ms": report.execution_latency_p95_ms, + "execution_latency_p99_ms": report.execution_latency_p99_ms, + }, + } + + reports = _recent_backtest_reports(request) + reports.insert(0, report_item) + del reports[20:] + + _record_audit( + request, + actor="dashboard_user", + event_type="dashboard.backtesting.run", + decision="completed", + payload={ + "events_path": report_item["events_path"], + "processed_events": report.processed_events, + "trades_executed": report.trades_executed, + "realized_pnl_usd": report.realized_pnl_usd, + }, + ) + + context = _backtesting_panel_context( + request, + status="completed", + message="Backtest run completed successfully.", + latest_report=report_item, + defaults=defaults, + ) + except ValueError as exc: + context = _backtesting_panel_context( + request, + status="failed", + message=str(exc), + defaults=defaults, + ) + + return templates.TemplateResponse( + request=request, + name="partials/backtesting_panel.html", + context={"request": request, **context}, + ) + + @router.post("/dashboard/control/start", response_class=HTMLResponse) async def dashboard_control_start(request: Request) -> HTMLResponse: controls = _dashboard_controls_state(request) @@ -543,8 +820,11 @@ async def dashboard_control_config(request: Request) -> HTMLResponse: ctl.tradable_pairs = _parse_comma_separated_list(form_pairs) if "strategy_mode" in form and form["strategy_mode"].strip(): strategy_mode = form["strategy_mode"].strip().lower() - if strategy_mode not in {"incremental", "paper", "live"}: - e = "strategy_mode must be one of: incremental, paper, live" + allowed_strategy_modes = {"incremental", "paper", "live"} + if bool(getattr(rs, "strategy_enable_stat_arb_experiment", False)): + allowed_strategy_modes.add("stat_arb_experiment") + if strategy_mode not in allowed_strategy_modes: + e = f"strategy_mode must be one of: {', '.join(sorted(allowed_strategy_modes))}" raise ValueError(e) ctl.strategy_mode = strategy_mode if "strategy_profit_threshold" in form: diff --git a/src/arbitrade/config/settings.py b/src/arbitrade/config/settings.py index dc9ed4d..11ac3f0 100644 --- a/src/arbitrade/config/settings.py +++ b/src/arbitrade/config/settings.py @@ -32,65 +32,117 @@ class Settings(BaseSettings): ) alerts_enabled: bool = Field(default=True, alias="ALERTS_ENABLED") - alert_min_severity: str = Field(default="warning", alias="ALERT_MIN_SEVERITY") - alert_dedup_seconds: float = Field(default=30.0, alias="ALERT_DEDUP_SECONDS") - alert_on_trade_events: bool = Field(default=True, alias="ALERT_ON_TRADE_EVENTS") - alert_on_error_events: bool = Field(default=True, alias="ALERT_ON_ERROR_EVENTS") - alert_on_threshold_events: bool = Field(default=True, alias="ALERT_ON_THRESHOLD_EVENTS") - alert_on_system_events: bool = Field(default=True, alias="ALERT_ON_SYSTEM_EVENTS") + alert_min_severity: str = Field( + default="warning", alias="ALERT_MIN_SEVERITY") + alert_dedup_seconds: float = Field( + default=30.0, alias="ALERT_DEDUP_SECONDS") + alert_on_trade_events: bool = Field( + default=True, alias="ALERT_ON_TRADE_EVENTS") + alert_on_error_events: bool = Field( + default=True, alias="ALERT_ON_ERROR_EVENTS") + alert_on_threshold_events: bool = Field( + default=True, alias="ALERT_ON_THRESHOLD_EVENTS") + alert_on_system_events: bool = Field( + default=True, alias="ALERT_ON_SYSTEM_EVENTS") - telegram_alerts_enabled: bool = Field(default=False, alias="TELEGRAM_ALERTS_ENABLED") - telegram_bot_token: str | None = Field(default=None, alias="TELEGRAM_BOT_TOKEN") - telegram_chat_id: str | None = Field(default=None, alias="TELEGRAM_CHAT_ID") + telegram_alerts_enabled: bool = Field( + default=False, alias="TELEGRAM_ALERTS_ENABLED") + telegram_bot_token: str | None = Field( + default=None, alias="TELEGRAM_BOT_TOKEN") + telegram_chat_id: str | None = Field( + default=None, alias="TELEGRAM_CHAT_ID") - discord_alerts_enabled: bool = Field(default=False, alias="DISCORD_ALERTS_ENABLED") - discord_webhook_url: str | None = Field(default=None, alias="DISCORD_WEBHOOK_URL") + discord_alerts_enabled: bool = Field( + default=False, alias="DISCORD_ALERTS_ENABLED") + discord_webhook_url: str | None = Field( + default=None, alias="DISCORD_WEBHOOK_URL") - email_alerts_enabled: bool = Field(default=False, alias="EMAIL_ALERTS_ENABLED") + email_alerts_enabled: bool = Field( + default=False, alias="EMAIL_ALERTS_ENABLED") email_smtp_host: str | None = Field(default=None, alias="EMAIL_SMTP_HOST") email_smtp_port: int = Field(default=587, alias="EMAIL_SMTP_PORT") - email_smtp_username: str | None = Field(default=None, alias="EMAIL_SMTP_USERNAME") - email_smtp_password: str | None = Field(default=None, alias="EMAIL_SMTP_PASSWORD") - email_alert_from: str | None = Field(default=None, alias="EMAIL_ALERT_FROM") + email_smtp_username: str | None = Field( + default=None, alias="EMAIL_SMTP_USERNAME") + email_smtp_password: str | None = Field( + default=None, alias="EMAIL_SMTP_PASSWORD") + email_alert_from: str | None = Field( + default=None, alias="EMAIL_ALERT_FROM") email_alert_to: str | None = Field(default=None, alias="EMAIL_ALERT_TO") email_smtp_use_tls: bool = Field(default=True, alias="EMAIL_SMTP_USE_TLS") - duckdb_path: Path = Field(default=Path("./data/arbitrade.duckdb"), alias="DUCKDB_PATH") + duckdb_path: Path = Field(default=Path( + "./data/arbitrade.duckdb"), alias="DUCKDB_PATH") - kraken_rest_url: str = Field(default="https://api.kraken.com", alias="KRAKEN_REST_URL") - kraken_ws_url: str = Field(default="wss://ws.kraken.com/v2", alias="KRAKEN_WS_URL") + kraken_rest_url: str = Field( + default="https://api.kraken.com", alias="KRAKEN_REST_URL") + kraken_ws_url: str = Field( + default="wss://ws.kraken.com/v2", alias="KRAKEN_WS_URL") kraken_private_rate_limit_seconds: float = Field( default=1.0, alias="KRAKEN_PRIVATE_RATE_LIMIT_SECONDS" ) - kraken_http_timeout_seconds: float = Field(default=10.0, alias="KRAKEN_HTTP_TIMEOUT_SECONDS") - kraken_retry_attempts: int = Field(default=3, alias="KRAKEN_RETRY_ATTEMPTS") + kraken_http_timeout_seconds: float = Field( + default=10.0, alias="KRAKEN_HTTP_TIMEOUT_SECONDS") + kraken_retry_attempts: int = Field( + default=3, alias="KRAKEN_RETRY_ATTEMPTS") kraken_retry_base_delay_seconds: float = Field( default=0.25, alias="KRAKEN_RETRY_BASE_DELAY_SECONDS" ) kraken_api_key: str | None = Field(default=None, alias="KRAKEN_API_KEY") - kraken_api_secret: str | None = Field(default=None, alias="KRAKEN_API_SECRET") + kraken_api_secret: str | None = Field( + default=None, alias="KRAKEN_API_SECRET") kraken_api_key_permissions: str = Field( default="query,trade", alias="KRAKEN_API_KEY_PERMISSIONS", ) - ws_heartbeat_timeout_seconds: float = Field(default=20.0, alias="WS_HEARTBEAT_TIMEOUT_SECONDS") - ws_max_staleness_seconds: float = Field(default=5.0, alias="WS_MAX_STALENESS_SECONDS") + ws_heartbeat_timeout_seconds: float = Field( + default=20.0, alias="WS_HEARTBEAT_TIMEOUT_SECONDS") + ws_max_staleness_seconds: float = Field( + default=5.0, alias="WS_MAX_STALENESS_SECONDS") + strategy_enable_stat_arb_experiment: bool = Field( + default=False, + alias="STRATEGY_ENABLE_STAT_ARB_EXPERIMENT", + ) + strategy_stat_arb_lookback_window: int = Field( + default=120, + alias="STRATEGY_STAT_ARB_LOOKBACK_WINDOW", + ) + strategy_stat_arb_entry_zscore: float = Field( + default=2.0, + alias="STRATEGY_STAT_ARB_ENTRY_ZSCORE", + ) + strategy_stat_arb_exit_zscore: float = Field( + default=0.5, + alias="STRATEGY_STAT_ARB_EXIT_ZSCORE", + ) + strategy_stat_arb_max_holding_seconds: float = Field( + default=900.0, + alias="STRATEGY_STAT_ARB_MAX_HOLDING_SECONDS", + ) paper_trading_mode: bool = Field(default=True, alias="PAPER_TRADING_MODE") trade_capital_usd: float = Field(default=100.0, alias="TRADE_CAPITAL_USD") - max_trade_capital_usd: float = Field(default=100.0, alias="MAX_TRADE_CAPITAL_USD") - max_concurrent_trades: int | None = Field(default=None, alias="MAX_CONCURRENT_TRADES") + max_trade_capital_usd: float = Field( + default=100.0, alias="MAX_TRADE_CAPITAL_USD") + max_concurrent_trades: int | None = Field( + default=None, alias="MAX_CONCURRENT_TRADES") max_exposure_per_asset_usd: float | None = Field( default=None, alias="MAX_EXPOSURE_PER_ASSET_USD", ) - quote_balance_asset: str = Field(default="USD", alias="QUOTE_BALANCE_ASSET") - min_order_size_usd: float | None = Field(default=None, alias="MIN_ORDER_SIZE_USD") + quote_balance_asset: str = Field( + default="USD", alias="QUOTE_BALANCE_ASSET") + min_order_size_usd: float | None = Field( + default=None, alias="MIN_ORDER_SIZE_USD") kill_switch_active: bool = Field(default=False, alias="KILL_SWITCH_ACTIVE") - daily_loss_limit_usd: float | None = Field(default=None, alias="DAILY_LOSS_LIMIT_USD") - cumulative_loss_limit_usd: float | None = Field(default=None, alias="CUMULATIVE_LOSS_LIMIT_USD") - max_source_latency_ms: float | None = Field(default=None, alias="MAX_SOURCE_LATENCY_MS") - max_apply_latency_ms: float | None = Field(default=None, alias="MAX_APPLY_LATENCY_MS") - max_consecutive_failures: int | None = Field(default=None, alias="MAX_CONSECUTIVE_FAILURES") + daily_loss_limit_usd: float | None = Field( + default=None, alias="DAILY_LOSS_LIMIT_USD") + cumulative_loss_limit_usd: float | None = Field( + default=None, alias="CUMULATIVE_LOSS_LIMIT_USD") + max_source_latency_ms: float | None = Field( + default=None, alias="MAX_SOURCE_LATENCY_MS") + max_apply_latency_ms: float | None = Field( + default=None, alias="MAX_APPLY_LATENCY_MS") + max_consecutive_failures: int | None = Field( + default=None, alias="MAX_CONSECUTIVE_FAILURES") fernet_key: str | None = Field(default=None, alias="FERNET_KEY") @@ -107,7 +159,8 @@ class Settings(BaseSettings): def _validate_log_level(cls, value: str) -> str: normalized = value.strip().upper() if normalized not in {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}: - raise ValueError("LOG_LEVEL must be one of: DEBUG, INFO, WARNING, ERROR, CRITICAL") + raise ValueError( + "LOG_LEVEL must be one of: DEBUG, INFO, WARNING, ERROR, CRITICAL") return normalized @field_validator("alert_min_severity") @@ -115,16 +168,19 @@ class Settings(BaseSettings): def _validate_alert_severity(cls, value: str) -> str: normalized = value.strip().lower() if normalized not in {"info", "warning", "error", "critical"}: - raise ValueError("ALERT_MIN_SEVERITY must be one of: info, warning, error, critical") + raise ValueError( + "ALERT_MIN_SEVERITY must be one of: info, warning, error, critical") return normalized @model_validator(mode="after") def _validate_security_constraints(self) -> Settings: if bool(self.dashboard_auth_username) ^ bool(self.dashboard_auth_password): - raise ValueError("dashboard auth requires both username and password") + raise ValueError( + "dashboard auth requires both username and password") if bool(self.kraken_api_key) ^ bool(self.kraken_api_secret): - raise ValueError("Kraken API auth requires both API key and secret") + raise ValueError( + "Kraken API auth requires both API key and secret") permissions = { token.strip().lower() @@ -132,13 +188,29 @@ class Settings(BaseSettings): if token.strip() } if permissions and ("query" not in permissions or "trade" not in permissions): - raise ValueError("KRAKEN_API_KEY_PERMISSIONS must include query and trade") + raise ValueError( + "KRAKEN_API_KEY_PERMISSIONS must include query and trade") if "withdraw" in permissions or "withdrawals" in permissions: - raise ValueError("KRAKEN_API_KEY_PERMISSIONS must not include withdrawal scope") + raise ValueError( + "KRAKEN_API_KEY_PERMISSIONS must not include withdrawal scope") if self.alert_dedup_seconds < 0.0: raise ValueError("ALERT_DEDUP_SECONDS must be >= 0") + if self.strategy_stat_arb_lookback_window < 2: + raise ValueError("STRATEGY_STAT_ARB_LOOKBACK_WINDOW must be >= 2") + if self.strategy_stat_arb_entry_zscore <= 0.0: + raise ValueError("STRATEGY_STAT_ARB_ENTRY_ZSCORE must be > 0") + if self.strategy_stat_arb_exit_zscore < 0.0: + raise ValueError("STRATEGY_STAT_ARB_EXIT_ZSCORE must be >= 0") + if self.strategy_stat_arb_entry_zscore <= self.strategy_stat_arb_exit_zscore: + raise ValueError( + "STRATEGY_STAT_ARB_ENTRY_ZSCORE must be greater than STRATEGY_STAT_ARB_EXIT_ZSCORE" + ) + if self.strategy_stat_arb_max_holding_seconds <= 0.0: + raise ValueError( + "STRATEGY_STAT_ARB_MAX_HOLDING_SECONDS must be > 0") + return self diff --git a/src/arbitrade/strategy/__init__.py b/src/arbitrade/strategy/__init__.py new file mode 100644 index 0000000..6d1952d --- /dev/null +++ b/src/arbitrade/strategy/__init__.py @@ -0,0 +1,5 @@ +"""Experimental strategy modules.""" + +from arbitrade.strategy.stat_arb import StatArbExperiment, StatArbExperimentConfig, StatArbSignal + +__all__ = ["StatArbExperiment", "StatArbExperimentConfig", "StatArbSignal"] diff --git a/src/arbitrade/strategy/stat_arb.py b/src/arbitrade/strategy/stat_arb.py new file mode 100644 index 0000000..78d574d --- /dev/null +++ b/src/arbitrade/strategy/stat_arb.py @@ -0,0 +1,152 @@ +from __future__ import annotations + +from collections import deque +from dataclasses import dataclass +from datetime import UTC, datetime +from statistics import fmean, pstdev +from typing import Literal + + +@dataclass(frozen=True, slots=True) +class StatArbExperimentConfig: + pair_a: str + pair_b: str + lookback_window: int = 120 + entry_zscore: float = 2.0 + exit_zscore: float = 0.5 + max_holding_seconds: float = 900.0 + + +@dataclass(frozen=True, slots=True) +class StatArbSignal: + action: Literal[ + "warmup", + "hold", + "enter_long_spread", + "enter_short_spread", + "exit_position", + ] + observed_at: datetime + spread: float + zscore: float | None + position: Literal["long", "short", "flat"] + + +class StatArbExperiment: + """Simple mean-reversion experiment scaffold behind feature flags.""" + + def __init__(self, config: StatArbExperimentConfig) -> None: + if config.lookback_window < 2: + raise ValueError("lookback_window must be >= 2") + if config.entry_zscore <= 0.0: + raise ValueError("entry_zscore must be > 0") + if config.exit_zscore < 0.0: + raise ValueError("exit_zscore must be >= 0") + if config.entry_zscore <= config.exit_zscore: + raise ValueError("entry_zscore must be > exit_zscore") + if config.max_holding_seconds <= 0.0: + raise ValueError("max_holding_seconds must be > 0") + + self._config = config + self._spreads: deque[float] = deque(maxlen=config.lookback_window) + self._position: Literal["long", "short", "flat"] = "flat" + self._position_opened_at: datetime | None = None + + @property + def config(self) -> StatArbExperimentConfig: + return self._config + + def reset(self) -> None: + self._spreads.clear() + self._position = "flat" + self._position_opened_at = None + + def observe( + self, + *, + price_a: float, + price_b: float, + observed_at: datetime, + ) -> StatArbSignal: + if price_a <= 0.0 or price_b <= 0.0: + raise ValueError("prices must be > 0") + + at = observed_at.astimezone(UTC) + spread = price_a - price_b + self._spreads.append(spread) + + if len(self._spreads) < self._config.lookback_window: + return StatArbSignal( + action="warmup", + observed_at=at, + spread=spread, + zscore=None, + position=self._position, + ) + + mean_spread = fmean(self._spreads) + std_spread = pstdev(self._spreads) + if std_spread == 0.0: + return StatArbSignal( + action="hold", + observed_at=at, + spread=spread, + zscore=0.0, + position=self._position, + ) + + zscore = (spread - mean_spread) / std_spread + + if self._position == "flat": + if zscore >= self._config.entry_zscore: + self._position = "short" + self._position_opened_at = at + return StatArbSignal( + action="enter_short_spread", + observed_at=at, + spread=spread, + zscore=zscore, + position=self._position, + ) + if zscore <= -self._config.entry_zscore: + self._position = "long" + self._position_opened_at = at + return StatArbSignal( + action="enter_long_spread", + observed_at=at, + spread=spread, + zscore=zscore, + position=self._position, + ) + return StatArbSignal( + action="hold", + observed_at=at, + spread=spread, + zscore=zscore, + position=self._position, + ) + + assert self._position_opened_at is not None + held_seconds = (at - self._position_opened_at).total_seconds() + should_exit = abs(zscore) <= self._config.exit_zscore + if held_seconds >= self._config.max_holding_seconds: + should_exit = True + + if should_exit: + self._position = "flat" + self._position_opened_at = None + return StatArbSignal( + action="exit_position", + observed_at=at, + spread=spread, + zscore=zscore, + position=self._position, + ) + + return StatArbSignal( + action="hold", + observed_at=at, + spread=spread, + zscore=zscore, + position=self._position, + ) diff --git a/tests/test_dashboard.py b/tests/test_dashboard.py index 17df8b0..e023590 100644 --- a/tests/test_dashboard.py +++ b/tests/test_dashboard.py @@ -191,7 +191,8 @@ async def test_dashboard_page_and_fragment_and_sse(tmp_path) -> None: assert "trade-open" in overview.text assert overview_stream.status_code == 200 - assert overview_stream.headers["content-type"].startswith("text/event-stream") + assert overview_stream.headers["content-type"].startswith( + "text/event-stream") assert "event: overview" in overview_stream.text assert "trade-open" in overview_stream.text @@ -261,7 +262,8 @@ async def test_dashboard_controls_update_runtime_state_and_config(tmp_path) -> N assert app.state.settings.max_trade_capital_usd == 300.0 assert app.state.settings.max_concurrent_trades == 4 assert app.state.settings.paper_trading_mode is True - assert app.state.dashboard_controls.tradable_pairs == ["BTC/USD", "ETH/BTC"] + assert app.state.dashboard_controls.tradable_pairs == [ + "BTC/USD", "ETH/BTC"] assert app.state.dashboard_controls.strategy_mode == "paper" assert app.state.dashboard_controls.strategy_profit_threshold == 0.0025 assert app.state.dashboard_controls.strategy_max_depth_levels == 7 @@ -273,10 +275,14 @@ async def test_dashboard_controls_update_runtime_state_and_config(tmp_path) -> N assert audit_recent.status_code == 200 entries = audit_recent.json()["entries"] assert len(entries) >= 4 - assert any(entry["event_type"] == "dashboard.control.stop" for entry in entries) - assert any(entry["event_type"] == "dashboard.control.start" for entry in entries) - assert any(entry["event_type"] == "dashboard.control.kill_switch" for entry in entries) - assert any(entry["event_type"] == "dashboard.control.config" for entry in entries) + assert any(entry["event_type"] == + "dashboard.control.stop" for entry in entries) + assert any(entry["event_type"] == + "dashboard.control.start" for entry in entries) + assert any(entry["event_type"] == + "dashboard.control.kill_switch" for entry in entries) + assert any(entry["event_type"] == + "dashboard.control.config" for entry in entries) async def test_dashboard_controls_emit_alerts(tmp_path) -> None: @@ -333,3 +339,54 @@ async def test_dashboard_alert_status_api_exposes_notifier_snapshot(tmp_path) -> assert payload["enabled"] is True assert "configured_channels" in payload assert "last_result" in payload + + +async def test_backtesting_page_run_and_recent_reports_api(tmp_path) -> None: + app = create_app(Settings(DUCKDB_PATH=tmp_path / "backtesting-ui.duckdb")) + + events_file = tmp_path / "replay.jsonl" + events_file.write_text( + "\n".join( + [ + '{"timestamp":"2026-06-01T12:00:00Z","symbol":"BTC/USD","bids":[[99.5,10.0]],"asks":[[100.0,10.0]]}', + '{"timestamp":"2026-06-01T12:00:01Z","symbol":"ETH/BTC","bids":[[0.051,10.0]],"asks":[[0.050,10.0]]}', + '{"timestamp":"2026-06-01T12:00:02Z","symbol":"ETH/USD","bids":[[110.0,10.0]],"asks":[[110.5,10.0]]}', + ] + ), + encoding="utf-8", + ) + + transport = httpx.ASGITransport(app=app) + async with httpx.AsyncClient(transport=transport, base_url="http://test") as client: + page = await client.get("/dashboard/backtesting") + fragment = await client.get("/dashboard/fragment/backtesting") + run = await client.post( + "/dashboard/backtesting/run", + data={ + "events_path": str(events_file), + "starting_balances": "USD=1000.0", + "trade_capital": "100.0", + "min_profit_threshold": "0.0005", + "fee_profile": "standard", + "slippage_bps": "4.0", + "execution_latency_ms": "20.0", + }, + ) + reports = await client.get("/dashboard/api/backtesting/reports") + + assert page.status_code == 200 + assert "Backtesting" in page.text + assert "/dashboard/fragment/backtesting" in page.text + + assert fragment.status_code == 200 + assert "Run Backtest" in fragment.text + assert "Recent Runs" in fragment.text + + assert run.status_code == 200 + assert "completed" in run.text + assert "Processed:" in run.text + + assert reports.status_code == 200 + payload = reports.json() + assert len(payload["reports"]) >= 1 + assert payload["reports"][0]["status"] == "completed" diff --git a/tests/unit/test_settings_validation.py b/tests/unit/test_settings_validation.py index 875ff30..65d1184 100644 --- a/tests/unit/test_settings_validation.py +++ b/tests/unit/test_settings_validation.py @@ -53,3 +53,20 @@ def test_valid_security_configuration_passes() -> None: ) assert settings.kraken_api_key_permissions == "query,trade" + + +def test_stat_arb_entry_zscore_must_exceed_exit_zscore() -> None: + with pytest.raises(ValidationError): + Settings( + _env_file=None, + STRATEGY_STAT_ARB_ENTRY_ZSCORE="0.5", + STRATEGY_STAT_ARB_EXIT_ZSCORE="0.5", + ) + + +def test_stat_arb_lookback_window_must_be_at_least_two() -> None: + with pytest.raises(ValidationError): + Settings( + _env_file=None, + STRATEGY_STAT_ARB_LOOKBACK_WINDOW="1", + ) diff --git a/tests/unit/test_stat_arb_experiment.py b/tests/unit/test_stat_arb_experiment.py new file mode 100644 index 0000000..f456f87 --- /dev/null +++ b/tests/unit/test_stat_arb_experiment.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from datetime import UTC, datetime, timedelta + +from arbitrade.strategy.stat_arb import StatArbExperiment, StatArbExperimentConfig + + +def test_stat_arb_experiment_warmup_then_entry_and_exit() -> None: + started_at = datetime(2026, 6, 2, 12, 0, tzinfo=UTC) + experiment = StatArbExperiment( + StatArbExperimentConfig( + pair_a="BTC/USD", + pair_b="ETH/USD", + lookback_window=5, + entry_zscore=1.5, + exit_zscore=0.2, + max_holding_seconds=0.5, + ) + ) + + # Warmup with nearly stationary spread around 0. + for idx in range(5): + signal = experiment.observe( + price_a=100.0 + (0.02 * idx), + price_b=100.0, + observed_at=started_at + timedelta(seconds=idx), + ) + + assert signal.action in {"warmup", "hold"} + + # Large positive spread should trigger short-spread entry. + entry = experiment.observe( + price_a=104.0, + price_b=100.0, + observed_at=started_at + timedelta(seconds=10), + ) + assert entry.action == "enter_short_spread" + assert entry.position == "short" + assert entry.zscore is not None + + # Mean reversion toward center should trigger exit. + exit_signal = experiment.observe( + price_a=100.05, + price_b=100.0, + observed_at=started_at + timedelta(seconds=11), + ) + assert exit_signal.action == "exit_position" + assert exit_signal.position == "flat" + + +def test_stat_arb_experiment_rejects_invalid_prices() -> None: + experiment = StatArbExperiment( + StatArbExperimentConfig( + pair_a="BTC/USD", + pair_b="ETH/USD", + lookback_window=5, + ) + ) + + at = datetime(2026, 6, 2, 12, 0, tzinfo=UTC) + try: + experiment.observe(price_a=0.0, price_b=100.0, observed_at=at) + except ValueError as exc: + assert "prices must be > 0" in str(exc) + else: + raise AssertionError("Expected ValueError for non-positive price") diff --git a/web/templates/backtesting.html b/web/templates/backtesting.html new file mode 100644 index 0000000..7519e75 --- /dev/null +++ b/web/templates/backtesting.html @@ -0,0 +1,24 @@ +{% extends "base.html" %} {% block title %}{{ title }}{% endblock %} {% block +content %} +
+
+

Backtesting

+

+ Replay controls, run status, and recent summary reports. +

+
+
+ Dashboard +
+
+ +
+ {% include "partials/backtesting_panel.html" %} +
+{% endblock %} diff --git a/web/templates/dashboard.html b/web/templates/dashboard.html index 4f00d09..b6be1fa 100644 --- a/web/templates/dashboard.html +++ b/web/templates/dashboard.html @@ -18,6 +18,7 @@ head_scripts %} >Refresh metrics Health + Backtesting diff --git a/web/templates/partials/backtesting_panel.html b/web/templates/partials/backtesting_panel.html new file mode 100644 index 0000000..15b665d --- /dev/null +++ b/web/templates/partials/backtesting_panel.html @@ -0,0 +1,142 @@ +
+
+
+
Run Status
+
{{ status }}
+
{{ message }}
+
+
+
Latest Report
+ {% if latest_report %} +
Run at {{ latest_report.run_at }}
+
Events: {{ latest_report.events_path }}
+
+ Processed: {{ latest_report.report.processed_events }} +
+
+ Opportunities: {{ latest_report.report.opportunities_seen }} +
+
Trades: {{ latest_report.report.trades_executed }}
+
+ Realized P&L: {{ + '%.4f'|format(latest_report.report.realized_pnl_usd) }} USD +
+
+ Max drawdown: {{ '%.4f'|format(latest_report.report.max_drawdown_usd) }} + USD +
+ {% else %} +
No runs yet.
+ {% endif %} +
+
+ +
+
Run Backtest
+
+ + + + + + + + + +
+
+ +
+
Recent Runs
+ {% if recent_reports %} {% for item in recent_reports %} +
+ {{ item.run_at }} | {{ item.events_path }} | trades={{ + item.report.trades_executed }} | pnl={{ + '%.4f'|format(item.report.realized_pnl_usd) }} USD +
+ {% endfor %} {% else %} +
No recent reports yet.
+ {% endif %} +
+
diff --git a/web/templates/partials/controls.html b/web/templates/partials/controls.html index afe405f..a5f968b 100644 --- a/web/templates/partials/controls.html +++ b/web/templates/partials/controls.html @@ -131,6 +131,12 @@ {% set sel = "selected" if strategy_mode == "live" else "" %} + {% if strategy_stat_arb_enabled %} {% set sel = "selected" if + strategy_mode == "stat_arb_experiment" else "" %} + + {% endif %}