Add OSM Track Harvesting Policy and demo database initialization script
- Updated documentation to include OSM Track Harvesting Policy with details on railway types, service filters, usage filters, and geometry guardrails. - Introduced a new script `init_demo_db.py` to automate the database setup process, including environment checks, running migrations, and loading OSM fixtures for demo data.
This commit is contained in:
@@ -76,17 +76,42 @@ STATION_TAG_FILTERS: Mapping[str, Tuple[str, ...]] = {
|
||||
|
||||
|
||||
# Tags that describe rail infrastructure usable for train routing.
|
||||
TRACK_ALLOWED_RAILWAY_TYPES: Tuple[str, ...] = (
|
||||
"rail",
|
||||
"light_rail",
|
||||
"subway",
|
||||
"tram",
|
||||
"narrow_gauge",
|
||||
"disused",
|
||||
"construction",
|
||||
)
|
||||
|
||||
|
||||
TRACK_TAG_FILTERS: Mapping[str, Tuple[str, ...]] = {
|
||||
"railway": (
|
||||
"rail",
|
||||
"light_rail",
|
||||
"subway",
|
||||
"tram",
|
||||
"narrow_gauge",
|
||||
),
|
||||
"railway": TRACK_ALLOWED_RAILWAY_TYPES,
|
||||
}
|
||||
|
||||
|
||||
# Track ingestion policy
|
||||
TRACK_EXCLUDED_SERVICE_TAGS: Tuple[str, ...] = (
|
||||
"yard",
|
||||
"siding",
|
||||
"spur",
|
||||
"crossover",
|
||||
"industrial",
|
||||
"military",
|
||||
)
|
||||
|
||||
TRACK_EXCLUDED_USAGE_TAGS: Tuple[str, ...] = (
|
||||
"military",
|
||||
"tourism",
|
||||
)
|
||||
|
||||
TRACK_MIN_LENGTH_METERS: float = 75.0
|
||||
|
||||
TRACK_STATION_SNAP_RADIUS_METERS: float = 350.0
|
||||
|
||||
|
||||
def compile_overpass_filters(filters: Mapping[str, Iterable[str]]) -> str:
|
||||
"""Build an Overpass boolean expression that matches the provided filters."""
|
||||
|
||||
@@ -101,6 +126,11 @@ __all__ = [
|
||||
"BoundingBox",
|
||||
"DEFAULT_REGIONS",
|
||||
"STATION_TAG_FILTERS",
|
||||
"TRACK_ALLOWED_RAILWAY_TYPES",
|
||||
"TRACK_TAG_FILTERS",
|
||||
"TRACK_EXCLUDED_SERVICE_TAGS",
|
||||
"TRACK_EXCLUDED_USAGE_TAGS",
|
||||
"TRACK_MIN_LENGTH_METERS",
|
||||
"TRACK_STATION_SNAP_RADIUS_METERS",
|
||||
"compile_overpass_filters",
|
||||
]
|
||||
|
||||
@@ -8,11 +8,15 @@ import math
|
||||
import sys
|
||||
from dataclasses import asdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
from typing import Any, Iterable, Mapping
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
from backend.app.core.osm_config import (
|
||||
DEFAULT_REGIONS,
|
||||
TRACK_ALLOWED_RAILWAY_TYPES,
|
||||
TRACK_EXCLUDED_SERVICE_TAGS,
|
||||
TRACK_EXCLUDED_USAGE_TAGS,
|
||||
TRACK_MIN_LENGTH_METERS,
|
||||
TRACK_TAG_FILTERS,
|
||||
compile_overpass_filters,
|
||||
)
|
||||
@@ -104,13 +108,15 @@ def normalize_track_elements(elements: Iterable[dict[str, Any]]) -> list[dict[st
|
||||
continue
|
||||
|
||||
tags: dict[str, Any] = element.get("tags", {})
|
||||
length_meters = _polyline_length(coordinates)
|
||||
if not _should_include_track(tags, length_meters):
|
||||
continue
|
||||
|
||||
name = tags.get("name")
|
||||
maxspeed = _parse_maxspeed(tags.get("maxspeed"))
|
||||
status = _derive_status(tags.get("railway"))
|
||||
is_bidirectional = not _is_oneway(tags.get("oneway"))
|
||||
|
||||
length_meters = _polyline_length(coordinates)
|
||||
|
||||
tracks.append(
|
||||
{
|
||||
"osmId": str(element.get("id")),
|
||||
@@ -156,6 +162,25 @@ def _derive_status(value: Any) -> str:
|
||||
return "operational"
|
||||
|
||||
|
||||
def _should_include_track(tags: Mapping[str, Any], length_meters: float) -> bool:
|
||||
railway = str(tags.get("railway", "")).lower()
|
||||
if railway not in TRACK_ALLOWED_RAILWAY_TYPES:
|
||||
return False
|
||||
|
||||
if length_meters < TRACK_MIN_LENGTH_METERS:
|
||||
return False
|
||||
|
||||
service = str(tags.get("service", "")).lower()
|
||||
if service and service in TRACK_EXCLUDED_SERVICE_TAGS:
|
||||
return False
|
||||
|
||||
usage = str(tags.get("usage", "")).lower()
|
||||
if usage and usage in TRACK_EXCLUDED_USAGE_TAGS:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _is_oneway(value: Any) -> bool:
|
||||
if value is None:
|
||||
return False
|
||||
|
||||
@@ -13,6 +13,7 @@ from typing import Any, Iterable, Mapping, Sequence
|
||||
from geoalchemy2.elements import WKBElement, WKTElement
|
||||
from geoalchemy2.shape import to_shape
|
||||
|
||||
from backend.app.core.osm_config import TRACK_STATION_SNAP_RADIUS_METERS
|
||||
from backend.app.db.session import SessionLocal
|
||||
from backend.app.models import TrackCreate
|
||||
from backend.app.repositories import StationRepository, TrackRepository
|
||||
@@ -133,26 +134,40 @@ def load_tracks(tracks: Iterable[ParsedTrack], commit: bool = True) -> int:
|
||||
|
||||
for track_data in tracks:
|
||||
start_station = _nearest_station(
|
||||
track_data.coordinates[0], station_index)
|
||||
track_data.coordinates[0],
|
||||
station_index,
|
||||
TRACK_STATION_SNAP_RADIUS_METERS,
|
||||
)
|
||||
end_station = _nearest_station(
|
||||
track_data.coordinates[-1], station_index)
|
||||
track_data.coordinates[-1],
|
||||
station_index,
|
||||
TRACK_STATION_SNAP_RADIUS_METERS,
|
||||
)
|
||||
|
||||
if not start_station or not end_station:
|
||||
continue
|
||||
|
||||
if start_station.id == end_station.id:
|
||||
continue
|
||||
|
||||
pair = (start_station.id, end_station.id)
|
||||
if pair in existing_pairs:
|
||||
continue
|
||||
|
||||
length = track_data.length_meters or _polyline_length(
|
||||
track_data.coordinates)
|
||||
max_speed = (
|
||||
int(round(track_data.max_speed_kph))
|
||||
if track_data.max_speed_kph is not None
|
||||
else None
|
||||
)
|
||||
create_schema = TrackCreate(
|
||||
name=track_data.name,
|
||||
start_station_id=start_station.id,
|
||||
end_station_id=end_station.id,
|
||||
coordinates=track_data.coordinates,
|
||||
length_meters=length,
|
||||
max_speed_kph=track_data.max_speed_kph,
|
||||
max_speed_kph=max_speed,
|
||||
status=track_data.status,
|
||||
is_bidirectional=track_data.is_bidirectional,
|
||||
)
|
||||
@@ -170,7 +185,9 @@ def load_tracks(tracks: Iterable[ParsedTrack], commit: bool = True) -> int:
|
||||
|
||||
|
||||
def _nearest_station(
|
||||
coordinate: tuple[float, float], stations: Sequence[StationRef]
|
||||
coordinate: tuple[float, float],
|
||||
stations: Sequence[StationRef],
|
||||
max_distance_meters: float,
|
||||
) -> StationRef | None:
|
||||
best_station: StationRef | None = None
|
||||
best_distance = math.inf
|
||||
@@ -180,7 +197,9 @@ def _nearest_station(
|
||||
if distance < best_distance:
|
||||
best_station = station
|
||||
best_distance = distance
|
||||
return best_station
|
||||
if best_distance <= max_distance_meters:
|
||||
return best_station
|
||||
return None
|
||||
|
||||
|
||||
def _build_station_index(stations: Iterable[Any]) -> list[StationRef]:
|
||||
@@ -192,11 +211,15 @@ def _build_station_index(stations: Iterable[Any]) -> list[StationRef]:
|
||||
point = _to_point(location)
|
||||
if point is None:
|
||||
continue
|
||||
latitude = getattr(point, "y", None)
|
||||
longitude = getattr(point, "x", None)
|
||||
if latitude is None or longitude is None:
|
||||
continue
|
||||
index.append(
|
||||
StationRef(
|
||||
id=str(station.id),
|
||||
latitude=float(point.y),
|
||||
longitude=float(point.x),
|
||||
latitude=float(latitude),
|
||||
longitude=float(longitude),
|
||||
)
|
||||
)
|
||||
return index
|
||||
|
||||
@@ -67,3 +67,44 @@ def test_normalize_track_elements_marks_oneway_and_status() -> None:
|
||||
track = tracks[0]
|
||||
assert track["status"] == "disused"
|
||||
assert track["isBidirectional"] is False
|
||||
|
||||
|
||||
def test_normalize_track_elements_skips_service_tracks() -> None:
|
||||
elements = [
|
||||
{
|
||||
"type": "way",
|
||||
"id": 77,
|
||||
"geometry": [
|
||||
{"lat": 52.5000, "lon": 13.4000},
|
||||
{"lat": 52.5010, "lon": 13.4010},
|
||||
],
|
||||
"tags": {
|
||||
"railway": "rail",
|
||||
"service": "yard",
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
tracks = tracks_import.normalize_track_elements(elements)
|
||||
|
||||
assert tracks == []
|
||||
|
||||
|
||||
def test_normalize_track_elements_skips_short_tracks() -> None:
|
||||
elements = [
|
||||
{
|
||||
"type": "way",
|
||||
"id": 81,
|
||||
"geometry": [
|
||||
{"lat": 52.500000, "lon": 13.400000},
|
||||
{"lat": 52.500100, "lon": 13.400050},
|
||||
],
|
||||
"tags": {
|
||||
"railway": "rail",
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
tracks = tracks_import.normalize_track_elements(elements)
|
||||
|
||||
assert tracks == []
|
||||
|
||||
@@ -166,3 +166,35 @@ def test_load_tracks_skips_existing_pairs(monkeypatch: pytest.MonkeyPatch) -> No
|
||||
assert created == 0
|
||||
assert session_instance.rolled_back is True
|
||||
assert not track_repo_instance.created
|
||||
|
||||
|
||||
def test_load_tracks_skips_when_station_too_far(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
session_instance = DummySession()
|
||||
station_repo_instance = DummyStationRepository(
|
||||
session_instance,
|
||||
stations=[
|
||||
DummyStation(id="remote-station", location=_point(53.5, 14.5)),
|
||||
],
|
||||
)
|
||||
track_repo_instance = DummyTrackRepository(session_instance)
|
||||
|
||||
monkeypatch.setattr(tracks_load, "SessionLocal", lambda: session_instance)
|
||||
monkeypatch.setattr(tracks_load, "StationRepository",
|
||||
lambda session: station_repo_instance)
|
||||
monkeypatch.setattr(tracks_load, "TrackRepository",
|
||||
lambda session: track_repo_instance)
|
||||
|
||||
parsed = tracks_load._parse_track_entries(
|
||||
[
|
||||
{
|
||||
"name": "Isolated Segment",
|
||||
"coordinates": [[52.5, 13.4], [52.51, 13.41]],
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
created = tracks_load.load_tracks(parsed, commit=True)
|
||||
|
||||
assert created == 0
|
||||
assert session_instance.committed is True
|
||||
assert not track_repo_instance.created
|
||||
|
||||
9782
data/osm_stations.json
Normal file
9782
data/osm_stations.json
Normal file
File diff suppressed because it is too large
Load Diff
527625
data/osm_tracks.json
Normal file
527625
data/osm_tracks.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -55,6 +55,13 @@ Dynamic simulation of train operations:
|
||||
- **Fallback Mechanisms**: Polling as alternative when WebSockets unavailable
|
||||
- **Event-Driven Updates**: Push notifications for game state changes
|
||||
|
||||
#### 8.2.4 OSM Track Harvesting Policy
|
||||
|
||||
- **Railway Types**: Importer requests `rail`, `light_rail`, `subway`, `tram`, `narrow_gauge`, plus `construction` and `disused` variants to capture build-state metadata.
|
||||
- **Service Filters**: `service` tags such as `yard`, `siding`, `spur`, `crossover`, `industrial`, or `military` are excluded to focus on mainline traffic.
|
||||
- **Usage Filters**: Ways flagged with `usage=military` or `usage=tourism` are skipped; unspecified usage defaults to accepted.
|
||||
- **Geometry Guardrails**: Segments shorter than 75 meters are discarded and track endpoints must snap to an existing station within 350 meters or the segment is ignored during loading.
|
||||
|
||||
### 8.3 User Interface Concepts
|
||||
|
||||
#### 8.3.1 Component-Based Architecture
|
||||
@@ -127,4 +134,3 @@ Dynamic simulation of train operations:
|
||||
- **Lazy Loading**: On-demand loading of components and data
|
||||
- **Caching Layers**: Redis for frequently accessed data
|
||||
- **Asset Optimization**: Minification and compression of static resources
|
||||
|
||||
|
||||
155
scripts/init_demo_db.py
Normal file
155
scripts/init_demo_db.py
Normal file
@@ -0,0 +1,155 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Initialize the database with demo data for the Rail Game.
|
||||
|
||||
This script automates the database setup process:
|
||||
1. Validates environment setup
|
||||
2. Runs database migrations
|
||||
3. Loads OSM fixtures for demo data
|
||||
|
||||
Usage:
|
||||
python scripts/init_demo_db.py [--dry-run] [--region REGION]
|
||||
|
||||
Requirements:
|
||||
- Virtual environment activated
|
||||
- .env file configured with DATABASE_URL
|
||||
- PostgreSQL with PostGIS running
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
except ImportError:
|
||||
print("WARNING: python-dotenv not installed. .env file will not be loaded automatically.")
|
||||
print("Install with: pip install python-dotenv")
|
||||
|
||||
|
||||
def check_virtualenv():
|
||||
"""Check if we're running in a virtual environment."""
|
||||
if not hasattr(sys, 'real_prefix') and not (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix):
|
||||
print("ERROR: Virtual environment not activated. Run:")
|
||||
print(" .venv\\Scripts\\Activate.ps1 (PowerShell)")
|
||||
print(" source .venv/bin/activate (Bash/macOS/Linux)")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def check_env_file():
|
||||
"""Check if .env file exists."""
|
||||
env_file = Path('.env')
|
||||
if not env_file.exists():
|
||||
print("ERROR: .env file not found. Copy .env.example to .env and configure:")
|
||||
print(" Copy-Item .env.example .env (PowerShell)")
|
||||
print(" cp .env.example .env (Bash)")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def check_database_url():
|
||||
"""Check if DATABASE_URL is set in environment."""
|
||||
database_url = os.getenv('DATABASE_URL')
|
||||
if not database_url:
|
||||
print("ERROR: DATABASE_URL not set. Check your .env file.")
|
||||
sys.exit(1)
|
||||
print(f"Using database: {database_url}")
|
||||
|
||||
|
||||
def run_command(cmd, cwd=None, description=""):
|
||||
"""Run a shell command and return the result."""
|
||||
print(f"\n>>> {description}")
|
||||
print(f"Running: {' '.join(cmd)}")
|
||||
try:
|
||||
result = subprocess.run(cmd, cwd=cwd, check=True,
|
||||
capture_output=True, text=True)
|
||||
if result.stdout:
|
||||
print(result.stdout)
|
||||
return result
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"ERROR: Command failed with exit code {e.returncode}")
|
||||
if e.stdout:
|
||||
print(e.stdout)
|
||||
if e.stderr:
|
||||
print(e.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def run_migrations():
|
||||
"""Run database migrations using alembic."""
|
||||
run_command(
|
||||
['alembic', 'upgrade', 'head'],
|
||||
cwd='backend',
|
||||
description="Running database migrations"
|
||||
)
|
||||
|
||||
|
||||
def load_osm_fixtures(region, dry_run=False):
|
||||
"""Load OSM fixtures for demo data."""
|
||||
cmd = ['python', '-m', 'backend.scripts.osm_refresh', '--region', region]
|
||||
if dry_run:
|
||||
cmd.append('--no-commit')
|
||||
description = f"Loading OSM fixtures (dry run) for region: {region}"
|
||||
else:
|
||||
description = f"Loading OSM fixtures for region: {region}"
|
||||
|
||||
run_command(cmd, description=description)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Initialize database with demo data")
|
||||
parser.add_argument(
|
||||
'--region',
|
||||
default='all',
|
||||
help='OSM region to load (default: all)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Dry run: run migrations and load fixtures without committing'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--skip-migrations',
|
||||
action='store_true',
|
||||
help='Skip running migrations'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--skip-fixtures',
|
||||
action='store_true',
|
||||
help='Skip loading OSM fixtures'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Rail Game Database Initialization")
|
||||
print("=" * 40)
|
||||
|
||||
# Pre-flight checks
|
||||
check_virtualenv()
|
||||
check_env_file()
|
||||
check_database_url()
|
||||
|
||||
# Run migrations
|
||||
if not args.skip_migrations:
|
||||
run_migrations()
|
||||
else:
|
||||
print("Skipping migrations (--skip-migrations)")
|
||||
|
||||
# Load fixtures
|
||||
if not args.skip_fixtures:
|
||||
load_osm_fixtures(args.region, args.dry_run)
|
||||
else:
|
||||
print("Skipping fixtures (--skip-fixtures)")
|
||||
|
||||
print("\n✅ Database initialization completed successfully!")
|
||||
if args.dry_run:
|
||||
print("Note: This was a dry run. No data was committed to the database.")
|
||||
else:
|
||||
print("Demo data loaded. You can now start the backend server.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user