Add OSM Track Harvesting Policy and demo database initialization script

- Updated documentation to include OSM Track Harvesting Policy with details on railway types, service filters, usage filters, and geometry guardrails.
- Introduced a new script `init_demo_db.py` to automate the database setup process, including environment checks, running migrations, and loading OSM fixtures for demo data.
This commit is contained in:
2025-10-11 21:37:25 +02:00
parent 0b84ee953e
commit 25ca7ab196
9 changed files with 537737 additions and 18 deletions

View File

@@ -76,17 +76,42 @@ STATION_TAG_FILTERS: Mapping[str, Tuple[str, ...]] = {
# Tags that describe rail infrastructure usable for train routing. # Tags that describe rail infrastructure usable for train routing.
TRACK_TAG_FILTERS: Mapping[str, Tuple[str, ...]] = { TRACK_ALLOWED_RAILWAY_TYPES: Tuple[str, ...] = (
"railway": (
"rail", "rail",
"light_rail", "light_rail",
"subway", "subway",
"tram", "tram",
"narrow_gauge", "narrow_gauge",
), "disused",
"construction",
)
TRACK_TAG_FILTERS: Mapping[str, Tuple[str, ...]] = {
"railway": TRACK_ALLOWED_RAILWAY_TYPES,
} }
# Track ingestion policy
TRACK_EXCLUDED_SERVICE_TAGS: Tuple[str, ...] = (
"yard",
"siding",
"spur",
"crossover",
"industrial",
"military",
)
TRACK_EXCLUDED_USAGE_TAGS: Tuple[str, ...] = (
"military",
"tourism",
)
TRACK_MIN_LENGTH_METERS: float = 75.0
TRACK_STATION_SNAP_RADIUS_METERS: float = 350.0
def compile_overpass_filters(filters: Mapping[str, Iterable[str]]) -> str: def compile_overpass_filters(filters: Mapping[str, Iterable[str]]) -> str:
"""Build an Overpass boolean expression that matches the provided filters.""" """Build an Overpass boolean expression that matches the provided filters."""
@@ -101,6 +126,11 @@ __all__ = [
"BoundingBox", "BoundingBox",
"DEFAULT_REGIONS", "DEFAULT_REGIONS",
"STATION_TAG_FILTERS", "STATION_TAG_FILTERS",
"TRACK_ALLOWED_RAILWAY_TYPES",
"TRACK_TAG_FILTERS", "TRACK_TAG_FILTERS",
"TRACK_EXCLUDED_SERVICE_TAGS",
"TRACK_EXCLUDED_USAGE_TAGS",
"TRACK_MIN_LENGTH_METERS",
"TRACK_STATION_SNAP_RADIUS_METERS",
"compile_overpass_filters", "compile_overpass_filters",
] ]

View File

@@ -8,11 +8,15 @@ import math
import sys import sys
from dataclasses import asdict from dataclasses import asdict
from pathlib import Path from pathlib import Path
from typing import Any, Iterable from typing import Any, Iterable, Mapping
from urllib.parse import quote_plus from urllib.parse import quote_plus
from backend.app.core.osm_config import ( from backend.app.core.osm_config import (
DEFAULT_REGIONS, DEFAULT_REGIONS,
TRACK_ALLOWED_RAILWAY_TYPES,
TRACK_EXCLUDED_SERVICE_TAGS,
TRACK_EXCLUDED_USAGE_TAGS,
TRACK_MIN_LENGTH_METERS,
TRACK_TAG_FILTERS, TRACK_TAG_FILTERS,
compile_overpass_filters, compile_overpass_filters,
) )
@@ -104,13 +108,15 @@ def normalize_track_elements(elements: Iterable[dict[str, Any]]) -> list[dict[st
continue continue
tags: dict[str, Any] = element.get("tags", {}) tags: dict[str, Any] = element.get("tags", {})
length_meters = _polyline_length(coordinates)
if not _should_include_track(tags, length_meters):
continue
name = tags.get("name") name = tags.get("name")
maxspeed = _parse_maxspeed(tags.get("maxspeed")) maxspeed = _parse_maxspeed(tags.get("maxspeed"))
status = _derive_status(tags.get("railway")) status = _derive_status(tags.get("railway"))
is_bidirectional = not _is_oneway(tags.get("oneway")) is_bidirectional = not _is_oneway(tags.get("oneway"))
length_meters = _polyline_length(coordinates)
tracks.append( tracks.append(
{ {
"osmId": str(element.get("id")), "osmId": str(element.get("id")),
@@ -156,6 +162,25 @@ def _derive_status(value: Any) -> str:
return "operational" return "operational"
def _should_include_track(tags: Mapping[str, Any], length_meters: float) -> bool:
railway = str(tags.get("railway", "")).lower()
if railway not in TRACK_ALLOWED_RAILWAY_TYPES:
return False
if length_meters < TRACK_MIN_LENGTH_METERS:
return False
service = str(tags.get("service", "")).lower()
if service and service in TRACK_EXCLUDED_SERVICE_TAGS:
return False
usage = str(tags.get("usage", "")).lower()
if usage and usage in TRACK_EXCLUDED_USAGE_TAGS:
return False
return True
def _is_oneway(value: Any) -> bool: def _is_oneway(value: Any) -> bool:
if value is None: if value is None:
return False return False

View File

@@ -13,6 +13,7 @@ from typing import Any, Iterable, Mapping, Sequence
from geoalchemy2.elements import WKBElement, WKTElement from geoalchemy2.elements import WKBElement, WKTElement
from geoalchemy2.shape import to_shape from geoalchemy2.shape import to_shape
from backend.app.core.osm_config import TRACK_STATION_SNAP_RADIUS_METERS
from backend.app.db.session import SessionLocal from backend.app.db.session import SessionLocal
from backend.app.models import TrackCreate from backend.app.models import TrackCreate
from backend.app.repositories import StationRepository, TrackRepository from backend.app.repositories import StationRepository, TrackRepository
@@ -133,26 +134,40 @@ def load_tracks(tracks: Iterable[ParsedTrack], commit: bool = True) -> int:
for track_data in tracks: for track_data in tracks:
start_station = _nearest_station( start_station = _nearest_station(
track_data.coordinates[0], station_index) track_data.coordinates[0],
station_index,
TRACK_STATION_SNAP_RADIUS_METERS,
)
end_station = _nearest_station( end_station = _nearest_station(
track_data.coordinates[-1], station_index) track_data.coordinates[-1],
station_index,
TRACK_STATION_SNAP_RADIUS_METERS,
)
if not start_station or not end_station: if not start_station or not end_station:
continue continue
if start_station.id == end_station.id:
continue
pair = (start_station.id, end_station.id) pair = (start_station.id, end_station.id)
if pair in existing_pairs: if pair in existing_pairs:
continue continue
length = track_data.length_meters or _polyline_length( length = track_data.length_meters or _polyline_length(
track_data.coordinates) track_data.coordinates)
max_speed = (
int(round(track_data.max_speed_kph))
if track_data.max_speed_kph is not None
else None
)
create_schema = TrackCreate( create_schema = TrackCreate(
name=track_data.name, name=track_data.name,
start_station_id=start_station.id, start_station_id=start_station.id,
end_station_id=end_station.id, end_station_id=end_station.id,
coordinates=track_data.coordinates, coordinates=track_data.coordinates,
length_meters=length, length_meters=length,
max_speed_kph=track_data.max_speed_kph, max_speed_kph=max_speed,
status=track_data.status, status=track_data.status,
is_bidirectional=track_data.is_bidirectional, is_bidirectional=track_data.is_bidirectional,
) )
@@ -170,7 +185,9 @@ def load_tracks(tracks: Iterable[ParsedTrack], commit: bool = True) -> int:
def _nearest_station( def _nearest_station(
coordinate: tuple[float, float], stations: Sequence[StationRef] coordinate: tuple[float, float],
stations: Sequence[StationRef],
max_distance_meters: float,
) -> StationRef | None: ) -> StationRef | None:
best_station: StationRef | None = None best_station: StationRef | None = None
best_distance = math.inf best_distance = math.inf
@@ -180,7 +197,9 @@ def _nearest_station(
if distance < best_distance: if distance < best_distance:
best_station = station best_station = station
best_distance = distance best_distance = distance
if best_distance <= max_distance_meters:
return best_station return best_station
return None
def _build_station_index(stations: Iterable[Any]) -> list[StationRef]: def _build_station_index(stations: Iterable[Any]) -> list[StationRef]:
@@ -192,11 +211,15 @@ def _build_station_index(stations: Iterable[Any]) -> list[StationRef]:
point = _to_point(location) point = _to_point(location)
if point is None: if point is None:
continue continue
latitude = getattr(point, "y", None)
longitude = getattr(point, "x", None)
if latitude is None or longitude is None:
continue
index.append( index.append(
StationRef( StationRef(
id=str(station.id), id=str(station.id),
latitude=float(point.y), latitude=float(latitude),
longitude=float(point.x), longitude=float(longitude),
) )
) )
return index return index

View File

@@ -67,3 +67,44 @@ def test_normalize_track_elements_marks_oneway_and_status() -> None:
track = tracks[0] track = tracks[0]
assert track["status"] == "disused" assert track["status"] == "disused"
assert track["isBidirectional"] is False assert track["isBidirectional"] is False
def test_normalize_track_elements_skips_service_tracks() -> None:
elements = [
{
"type": "way",
"id": 77,
"geometry": [
{"lat": 52.5000, "lon": 13.4000},
{"lat": 52.5010, "lon": 13.4010},
],
"tags": {
"railway": "rail",
"service": "yard",
},
}
]
tracks = tracks_import.normalize_track_elements(elements)
assert tracks == []
def test_normalize_track_elements_skips_short_tracks() -> None:
elements = [
{
"type": "way",
"id": 81,
"geometry": [
{"lat": 52.500000, "lon": 13.400000},
{"lat": 52.500100, "lon": 13.400050},
],
"tags": {
"railway": "rail",
},
}
]
tracks = tracks_import.normalize_track_elements(elements)
assert tracks == []

View File

@@ -166,3 +166,35 @@ def test_load_tracks_skips_existing_pairs(monkeypatch: pytest.MonkeyPatch) -> No
assert created == 0 assert created == 0
assert session_instance.rolled_back is True assert session_instance.rolled_back is True
assert not track_repo_instance.created assert not track_repo_instance.created
def test_load_tracks_skips_when_station_too_far(monkeypatch: pytest.MonkeyPatch) -> None:
session_instance = DummySession()
station_repo_instance = DummyStationRepository(
session_instance,
stations=[
DummyStation(id="remote-station", location=_point(53.5, 14.5)),
],
)
track_repo_instance = DummyTrackRepository(session_instance)
monkeypatch.setattr(tracks_load, "SessionLocal", lambda: session_instance)
monkeypatch.setattr(tracks_load, "StationRepository",
lambda session: station_repo_instance)
monkeypatch.setattr(tracks_load, "TrackRepository",
lambda session: track_repo_instance)
parsed = tracks_load._parse_track_entries(
[
{
"name": "Isolated Segment",
"coordinates": [[52.5, 13.4], [52.51, 13.41]],
}
]
)
created = tracks_load.load_tracks(parsed, commit=True)
assert created == 0
assert session_instance.committed is True
assert not track_repo_instance.created

9782
data/osm_stations.json Normal file

File diff suppressed because it is too large Load Diff

527625
data/osm_tracks.json Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -55,6 +55,13 @@ Dynamic simulation of train operations:
- **Fallback Mechanisms**: Polling as alternative when WebSockets unavailable - **Fallback Mechanisms**: Polling as alternative when WebSockets unavailable
- **Event-Driven Updates**: Push notifications for game state changes - **Event-Driven Updates**: Push notifications for game state changes
#### 8.2.4 OSM Track Harvesting Policy
- **Railway Types**: Importer requests `rail`, `light_rail`, `subway`, `tram`, `narrow_gauge`, plus `construction` and `disused` variants to capture build-state metadata.
- **Service Filters**: `service` tags such as `yard`, `siding`, `spur`, `crossover`, `industrial`, or `military` are excluded to focus on mainline traffic.
- **Usage Filters**: Ways flagged with `usage=military` or `usage=tourism` are skipped; unspecified usage defaults to accepted.
- **Geometry Guardrails**: Segments shorter than 75 meters are discarded and track endpoints must snap to an existing station within 350 meters or the segment is ignored during loading.
### 8.3 User Interface Concepts ### 8.3 User Interface Concepts
#### 8.3.1 Component-Based Architecture #### 8.3.1 Component-Based Architecture
@@ -127,4 +134,3 @@ Dynamic simulation of train operations:
- **Lazy Loading**: On-demand loading of components and data - **Lazy Loading**: On-demand loading of components and data
- **Caching Layers**: Redis for frequently accessed data - **Caching Layers**: Redis for frequently accessed data
- **Asset Optimization**: Minification and compression of static resources - **Asset Optimization**: Minification and compression of static resources

155
scripts/init_demo_db.py Normal file
View File

@@ -0,0 +1,155 @@
#!/usr/bin/env python3
"""
Initialize the database with demo data for the Rail Game.
This script automates the database setup process:
1. Validates environment setup
2. Runs database migrations
3. Loads OSM fixtures for demo data
Usage:
python scripts/init_demo_db.py [--dry-run] [--region REGION]
Requirements:
- Virtual environment activated
- .env file configured with DATABASE_URL
- PostgreSQL with PostGIS running
"""
import argparse
import os
import subprocess
import sys
from pathlib import Path
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
print("WARNING: python-dotenv not installed. .env file will not be loaded automatically.")
print("Install with: pip install python-dotenv")
def check_virtualenv():
"""Check if we're running in a virtual environment."""
if not hasattr(sys, 'real_prefix') and not (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix):
print("ERROR: Virtual environment not activated. Run:")
print(" .venv\\Scripts\\Activate.ps1 (PowerShell)")
print(" source .venv/bin/activate (Bash/macOS/Linux)")
sys.exit(1)
def check_env_file():
"""Check if .env file exists."""
env_file = Path('.env')
if not env_file.exists():
print("ERROR: .env file not found. Copy .env.example to .env and configure:")
print(" Copy-Item .env.example .env (PowerShell)")
print(" cp .env.example .env (Bash)")
sys.exit(1)
def check_database_url():
"""Check if DATABASE_URL is set in environment."""
database_url = os.getenv('DATABASE_URL')
if not database_url:
print("ERROR: DATABASE_URL not set. Check your .env file.")
sys.exit(1)
print(f"Using database: {database_url}")
def run_command(cmd, cwd=None, description=""):
"""Run a shell command and return the result."""
print(f"\n>>> {description}")
print(f"Running: {' '.join(cmd)}")
try:
result = subprocess.run(cmd, cwd=cwd, check=True,
capture_output=True, text=True)
if result.stdout:
print(result.stdout)
return result
except subprocess.CalledProcessError as e:
print(f"ERROR: Command failed with exit code {e.returncode}")
if e.stdout:
print(e.stdout)
if e.stderr:
print(e.stderr)
sys.exit(1)
def run_migrations():
"""Run database migrations using alembic."""
run_command(
['alembic', 'upgrade', 'head'],
cwd='backend',
description="Running database migrations"
)
def load_osm_fixtures(region, dry_run=False):
"""Load OSM fixtures for demo data."""
cmd = ['python', '-m', 'backend.scripts.osm_refresh', '--region', region]
if dry_run:
cmd.append('--no-commit')
description = f"Loading OSM fixtures (dry run) for region: {region}"
else:
description = f"Loading OSM fixtures for region: {region}"
run_command(cmd, description=description)
def main():
parser = argparse.ArgumentParser(
description="Initialize database with demo data")
parser.add_argument(
'--region',
default='all',
help='OSM region to load (default: all)'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Dry run: run migrations and load fixtures without committing'
)
parser.add_argument(
'--skip-migrations',
action='store_true',
help='Skip running migrations'
)
parser.add_argument(
'--skip-fixtures',
action='store_true',
help='Skip loading OSM fixtures'
)
args = parser.parse_args()
print("Rail Game Database Initialization")
print("=" * 40)
# Pre-flight checks
check_virtualenv()
check_env_file()
check_database_url()
# Run migrations
if not args.skip_migrations:
run_migrations()
else:
print("Skipping migrations (--skip-migrations)")
# Load fixtures
if not args.skip_fixtures:
load_osm_fixtures(args.region, args.dry_run)
else:
print("Skipping fixtures (--skip-fixtures)")
print("\n✅ Database initialization completed successfully!")
if args.dry_run:
print("Note: This was a dry run. No data was committed to the database.")
else:
print("Demo data loaded. You can now start the backend server.")
if __name__ == '__main__':
main()