Add OSM Track Harvesting Policy and demo database initialization script

- Updated documentation to include OSM Track Harvesting Policy with details on railway types, service filters, usage filters, and geometry guardrails.
- Introduced a new script `init_demo_db.py` to automate the database setup process, including environment checks, running migrations, and loading OSM fixtures for demo data.
This commit is contained in:
2025-10-11 21:37:25 +02:00
parent 0b84ee953e
commit 25ca7ab196
9 changed files with 537737 additions and 18 deletions

View File

@@ -76,17 +76,42 @@ STATION_TAG_FILTERS: Mapping[str, Tuple[str, ...]] = {
# Tags that describe rail infrastructure usable for train routing.
TRACK_ALLOWED_RAILWAY_TYPES: Tuple[str, ...] = (
"rail",
"light_rail",
"subway",
"tram",
"narrow_gauge",
"disused",
"construction",
)
TRACK_TAG_FILTERS: Mapping[str, Tuple[str, ...]] = {
"railway": (
"rail",
"light_rail",
"subway",
"tram",
"narrow_gauge",
),
"railway": TRACK_ALLOWED_RAILWAY_TYPES,
}
# Track ingestion policy
TRACK_EXCLUDED_SERVICE_TAGS: Tuple[str, ...] = (
"yard",
"siding",
"spur",
"crossover",
"industrial",
"military",
)
TRACK_EXCLUDED_USAGE_TAGS: Tuple[str, ...] = (
"military",
"tourism",
)
TRACK_MIN_LENGTH_METERS: float = 75.0
TRACK_STATION_SNAP_RADIUS_METERS: float = 350.0
def compile_overpass_filters(filters: Mapping[str, Iterable[str]]) -> str:
"""Build an Overpass boolean expression that matches the provided filters."""
@@ -101,6 +126,11 @@ __all__ = [
"BoundingBox",
"DEFAULT_REGIONS",
"STATION_TAG_FILTERS",
"TRACK_ALLOWED_RAILWAY_TYPES",
"TRACK_TAG_FILTERS",
"TRACK_EXCLUDED_SERVICE_TAGS",
"TRACK_EXCLUDED_USAGE_TAGS",
"TRACK_MIN_LENGTH_METERS",
"TRACK_STATION_SNAP_RADIUS_METERS",
"compile_overpass_filters",
]

View File

@@ -8,11 +8,15 @@ import math
import sys
from dataclasses import asdict
from pathlib import Path
from typing import Any, Iterable
from typing import Any, Iterable, Mapping
from urllib.parse import quote_plus
from backend.app.core.osm_config import (
DEFAULT_REGIONS,
TRACK_ALLOWED_RAILWAY_TYPES,
TRACK_EXCLUDED_SERVICE_TAGS,
TRACK_EXCLUDED_USAGE_TAGS,
TRACK_MIN_LENGTH_METERS,
TRACK_TAG_FILTERS,
compile_overpass_filters,
)
@@ -104,13 +108,15 @@ def normalize_track_elements(elements: Iterable[dict[str, Any]]) -> list[dict[st
continue
tags: dict[str, Any] = element.get("tags", {})
length_meters = _polyline_length(coordinates)
if not _should_include_track(tags, length_meters):
continue
name = tags.get("name")
maxspeed = _parse_maxspeed(tags.get("maxspeed"))
status = _derive_status(tags.get("railway"))
is_bidirectional = not _is_oneway(tags.get("oneway"))
length_meters = _polyline_length(coordinates)
tracks.append(
{
"osmId": str(element.get("id")),
@@ -156,6 +162,25 @@ def _derive_status(value: Any) -> str:
return "operational"
def _should_include_track(tags: Mapping[str, Any], length_meters: float) -> bool:
railway = str(tags.get("railway", "")).lower()
if railway not in TRACK_ALLOWED_RAILWAY_TYPES:
return False
if length_meters < TRACK_MIN_LENGTH_METERS:
return False
service = str(tags.get("service", "")).lower()
if service and service in TRACK_EXCLUDED_SERVICE_TAGS:
return False
usage = str(tags.get("usage", "")).lower()
if usage and usage in TRACK_EXCLUDED_USAGE_TAGS:
return False
return True
def _is_oneway(value: Any) -> bool:
if value is None:
return False

View File

@@ -13,6 +13,7 @@ from typing import Any, Iterable, Mapping, Sequence
from geoalchemy2.elements import WKBElement, WKTElement
from geoalchemy2.shape import to_shape
from backend.app.core.osm_config import TRACK_STATION_SNAP_RADIUS_METERS
from backend.app.db.session import SessionLocal
from backend.app.models import TrackCreate
from backend.app.repositories import StationRepository, TrackRepository
@@ -133,26 +134,40 @@ def load_tracks(tracks: Iterable[ParsedTrack], commit: bool = True) -> int:
for track_data in tracks:
start_station = _nearest_station(
track_data.coordinates[0], station_index)
track_data.coordinates[0],
station_index,
TRACK_STATION_SNAP_RADIUS_METERS,
)
end_station = _nearest_station(
track_data.coordinates[-1], station_index)
track_data.coordinates[-1],
station_index,
TRACK_STATION_SNAP_RADIUS_METERS,
)
if not start_station or not end_station:
continue
if start_station.id == end_station.id:
continue
pair = (start_station.id, end_station.id)
if pair in existing_pairs:
continue
length = track_data.length_meters or _polyline_length(
track_data.coordinates)
max_speed = (
int(round(track_data.max_speed_kph))
if track_data.max_speed_kph is not None
else None
)
create_schema = TrackCreate(
name=track_data.name,
start_station_id=start_station.id,
end_station_id=end_station.id,
coordinates=track_data.coordinates,
length_meters=length,
max_speed_kph=track_data.max_speed_kph,
max_speed_kph=max_speed,
status=track_data.status,
is_bidirectional=track_data.is_bidirectional,
)
@@ -170,7 +185,9 @@ def load_tracks(tracks: Iterable[ParsedTrack], commit: bool = True) -> int:
def _nearest_station(
coordinate: tuple[float, float], stations: Sequence[StationRef]
coordinate: tuple[float, float],
stations: Sequence[StationRef],
max_distance_meters: float,
) -> StationRef | None:
best_station: StationRef | None = None
best_distance = math.inf
@@ -180,7 +197,9 @@ def _nearest_station(
if distance < best_distance:
best_station = station
best_distance = distance
return best_station
if best_distance <= max_distance_meters:
return best_station
return None
def _build_station_index(stations: Iterable[Any]) -> list[StationRef]:
@@ -192,11 +211,15 @@ def _build_station_index(stations: Iterable[Any]) -> list[StationRef]:
point = _to_point(location)
if point is None:
continue
latitude = getattr(point, "y", None)
longitude = getattr(point, "x", None)
if latitude is None or longitude is None:
continue
index.append(
StationRef(
id=str(station.id),
latitude=float(point.y),
longitude=float(point.x),
latitude=float(latitude),
longitude=float(longitude),
)
)
return index

View File

@@ -67,3 +67,44 @@ def test_normalize_track_elements_marks_oneway_and_status() -> None:
track = tracks[0]
assert track["status"] == "disused"
assert track["isBidirectional"] is False
def test_normalize_track_elements_skips_service_tracks() -> None:
elements = [
{
"type": "way",
"id": 77,
"geometry": [
{"lat": 52.5000, "lon": 13.4000},
{"lat": 52.5010, "lon": 13.4010},
],
"tags": {
"railway": "rail",
"service": "yard",
},
}
]
tracks = tracks_import.normalize_track_elements(elements)
assert tracks == []
def test_normalize_track_elements_skips_short_tracks() -> None:
elements = [
{
"type": "way",
"id": 81,
"geometry": [
{"lat": 52.500000, "lon": 13.400000},
{"lat": 52.500100, "lon": 13.400050},
],
"tags": {
"railway": "rail",
},
}
]
tracks = tracks_import.normalize_track_elements(elements)
assert tracks == []

View File

@@ -166,3 +166,35 @@ def test_load_tracks_skips_existing_pairs(monkeypatch: pytest.MonkeyPatch) -> No
assert created == 0
assert session_instance.rolled_back is True
assert not track_repo_instance.created
def test_load_tracks_skips_when_station_too_far(monkeypatch: pytest.MonkeyPatch) -> None:
session_instance = DummySession()
station_repo_instance = DummyStationRepository(
session_instance,
stations=[
DummyStation(id="remote-station", location=_point(53.5, 14.5)),
],
)
track_repo_instance = DummyTrackRepository(session_instance)
monkeypatch.setattr(tracks_load, "SessionLocal", lambda: session_instance)
monkeypatch.setattr(tracks_load, "StationRepository",
lambda session: station_repo_instance)
monkeypatch.setattr(tracks_load, "TrackRepository",
lambda session: track_repo_instance)
parsed = tracks_load._parse_track_entries(
[
{
"name": "Isolated Segment",
"coordinates": [[52.5, 13.4], [52.51, 13.41]],
}
]
)
created = tracks_load.load_tracks(parsed, commit=True)
assert created == 0
assert session_instance.committed is True
assert not track_repo_instance.created