feat: Enhance project and scenario creation with monitoring metrics
Some checks failed
CI / lint (push) Failing after 1m14s
CI / test (push) Has been skipped
CI / build (push) Has been skipped

- Added monitoring metrics for project creation success and error handling in `ProjectRepository`.
- Implemented similar monitoring for scenario creation in `ScenarioRepository`.
- Refactored `run_monte_carlo` function in `simulation.py` to include timing and success/error metrics.
- Introduced new CSS styles for headers, alerts, and navigation buttons in `main.css` and `projects.css`.
- Created a new JavaScript file for navigation logic to handle chevron buttons.
- Updated HTML templates to include new navigation buttons and improved styling for buttons.
- Added tests for reporting service and routes to ensure proper functionality and access control.
- Removed unused imports and optimized existing test files for better clarity and performance.
This commit is contained in:
2025-11-12 10:36:24 +01:00
parent f68321cd04
commit ce9c174b53
61 changed files with 2124 additions and 308 deletions

View File

@@ -1,7 +1,14 @@
from __future__ import annotations
from fastapi import APIRouter, Response
from datetime import datetime, timedelta
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query, Response
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
from sqlalchemy.orm import Session
from config.database import get_db
from services.metrics import MetricsService
router = APIRouter(prefix="/metrics", tags=["monitoring"])
@@ -11,3 +18,99 @@ router = APIRouter(prefix="/metrics", tags=["monitoring"])
async def metrics_endpoint() -> Response:
payload = generate_latest()
return Response(content=payload, media_type=CONTENT_TYPE_LATEST)
@router.get("/performance", summary="Get performance metrics")
async def get_performance_metrics(
metric_name: Optional[str] = Query(
None, description="Filter by metric name"),
hours: int = Query(24, description="Hours back to look"),
db: Session = Depends(get_db),
) -> dict:
"""Get aggregated performance metrics."""
service = MetricsService(db)
start_time = datetime.utcnow() - timedelta(hours=hours)
if metric_name:
metrics = service.get_metrics(
metric_name=metric_name, start_time=start_time)
aggregated = service.get_aggregated_metrics(
metric_name, start_time=start_time)
return {
"metric_name": metric_name,
"period_hours": hours,
"aggregated": aggregated,
"recent_samples": [
{
"timestamp": m.timestamp.isoformat(),
"value": m.value,
"labels": m.labels,
"endpoint": m.endpoint,
"method": m.method,
"status_code": m.status_code,
"duration_seconds": m.duration_seconds,
}
for m in metrics[:50] # Last 50 samples
],
}
# Return summary for all metrics
all_metrics = service.get_metrics(start_time=start_time, limit=1000)
metric_types = {}
for m in all_metrics:
if m.metric_name not in metric_types:
metric_types[m.metric_name] = []
metric_types[m.metric_name].append(m.value)
summary = {}
for name, values in metric_types.items():
summary[name] = {
"count": len(values),
"avg": sum(values) / len(values) if values else 0,
"min": min(values) if values else 0,
"max": max(values) if values else 0,
}
return {
"period_hours": hours,
"summary": summary,
}
@router.get("/health", summary="Detailed health check with metrics")
async def detailed_health(db: Session = Depends(get_db)) -> dict:
"""Get detailed health status with recent metrics."""
service = MetricsService(db)
last_hour = datetime.utcnow() - timedelta(hours=1)
# Get request metrics from last hour
request_metrics = service.get_metrics(
metric_name="http_request", start_time=last_hour
)
if request_metrics:
durations = []
error_count = 0
for m in request_metrics:
if m.duration_seconds is not None:
durations.append(m.duration_seconds)
if m.status_code is not None and m.status_code >= 400:
error_count += 1
total_requests = len(request_metrics)
avg_duration = sum(durations) / len(durations) if durations else 0
error_rate = error_count / total_requests if total_requests > 0 else 0
else:
avg_duration = 0
error_rate = 0
total_requests = 0
return {
"status": "ok",
"timestamp": datetime.utcnow().isoformat(),
"metrics": {
"requests_last_hour": total_requests,
"avg_response_time_seconds": avg_duration,
"error_rate": error_rate,
},
}