feat: implement import functionality for projects and scenarios with CSV/XLSX support, including validation and error handling

2025-11-10 09:10:47 +01:00
parent 7058eb4172
commit 3bc124c11f
7 changed files with 1084 additions and 2 deletions
--- a/tests/test_import_ingestion.py
+++ b/tests/test_import_ingestion.py
@@ -0,0 +1,237 @@
+from __future__ import annotations
+
+from io import BytesIO
+from typing import Callable
+
+import pandas as pd
+import pytest
+
+from models.project import MiningOperationType, Project
+from models.scenario import Scenario, ScenarioStatus
+from services.importers import (
+    ImportIngestionService,
+    ImportPreviewState,
+    StagedImportView,
+)
+from services.unit_of_work import UnitOfWork
+
+
+@pytest.fixture()
+def ingestion_service(unit_of_work_factory: Callable[[], UnitOfWork]) -> ImportIngestionService:
+    return ImportIngestionService(unit_of_work_factory)
+
+
+def test_preview_projects_flags_updates_and_duplicates(
+    ingestion_service: ImportIngestionService,
+    unit_of_work_factory: Callable[[], UnitOfWork],
+) -> None:
+    with unit_of_work_factory() as uow:
+        assert uow.projects is not None
+        existing = Project(
+            name="Project A",
+            location="Chile",
+            operation_type=MiningOperationType.OPEN_PIT,
+        )
+        uow.projects.create(existing)
+
+    csv_content = (
+        "name,location,operation_type\n"
+        "Project A,Peru,open pit\n"
+        "Project B,Canada,underground\n"
+        "Project B,Canada,underground\n"
+    )
+    stream = BytesIO(csv_content.encode("utf-8"))
+
+    preview = ingestion_service.preview_projects(stream, "projects.csv")
+
+    states = [row.state for row in preview.rows]
+    assert states == [
+        ImportPreviewState.UPDATE,
+        ImportPreviewState.NEW,
+        ImportPreviewState.SKIP,
+    ]
+    assert preview.summary.total_rows == 3
+    assert preview.summary.accepted == 2
+    assert preview.summary.skipped == 1
+    assert preview.summary.errored == 0
+    assert preview.parser_errors == []
+    assert preview.stage_token is not None
+    issue_map = {bundle.row_number: bundle for bundle in preview.row_issues}
+    assert 2 in issue_map and issue_map[2].state == ImportPreviewState.UPDATE
+    assert {
+        detail.message for detail in issue_map[2].issues
+    } == {"Existing project will be updated."}
+    assert 4 in issue_map and issue_map[4].state == ImportPreviewState.SKIP
+    assert any(
+        "Duplicate project name" in detail.message
+        for detail in issue_map[4].issues
+    )
+    # type: ignore[attr-defined]
+    staged = ingestion_service._project_stage[preview.stage_token]
+    assert len(staged.rows) == 2
+    update_context = preview.rows[0].context
+    assert update_context is not None and update_context.get(
+        "project_id") is not None
+
+
+def test_preview_scenarios_validates_projects_and_updates(
+    ingestion_service: ImportIngestionService,
+    unit_of_work_factory: Callable[[], UnitOfWork],
+) -> None:
+    with unit_of_work_factory() as uow:
+        assert uow.projects is not None and uow.scenarios is not None
+        project = Project(
+            name="Existing Project",
+            location="Chile",
+            operation_type=MiningOperationType.OPEN_PIT,
+        )
+        uow.projects.create(project)
+        scenario = Scenario(
+            project_id=project.id,
+            name="Existing Scenario",
+            status=ScenarioStatus.ACTIVE,
+        )
+        uow.scenarios.create(scenario)
+
+    df = pd.DataFrame(
+        [
+            {
+                "project_name": "Existing Project",
+                "name": "Existing Scenario",
+                "status": "Active",
+            },
+            {
+                "project_name": "Existing Project",
+                "name": "New Scenario",
+                "status": "Draft",
+            },
+            {
+                "project_name": "Missing Project",
+                "name": "Ghost Scenario",
+                "status": "Draft",
+            },
+            {
+                "project_name": "Existing Project",
+                "name": "New Scenario",
+                "status": "Draft",
+            },
+        ]
+    )
+    buffer = BytesIO()
+    df.to_csv(buffer, index=False)
+    buffer.seek(0)
+
+    preview = ingestion_service.preview_scenarios(buffer, "scenarios.csv")
+
+    states = [row.state for row in preview.rows]
+    assert states == [
+        ImportPreviewState.UPDATE,
+        ImportPreviewState.NEW,
+        ImportPreviewState.ERROR,
+        ImportPreviewState.SKIP,
+    ]
+    assert preview.summary.total_rows == 4
+    assert preview.summary.accepted == 2
+    assert preview.summary.skipped == 1
+    assert preview.summary.errored == 1
+    assert preview.stage_token is not None
+    issue_map = {bundle.row_number: bundle for bundle in preview.row_issues}
+    assert 2 in issue_map and issue_map[2].state == ImportPreviewState.UPDATE
+    assert 4 in issue_map and issue_map[4].state == ImportPreviewState.ERROR
+    assert any(
+        "does not exist" in detail.message
+        for detail in issue_map[4].issues
+    )
+    # type: ignore[attr-defined]
+    staged = ingestion_service._scenario_stage[preview.stage_token]
+    assert len(staged.rows) == 2
+    error_row = preview.rows[2]
+    assert any("does not exist" in msg for msg in error_row.issues)
+
+
+def test_preview_scenarios_aggregates_parser_errors(
+    ingestion_service: ImportIngestionService,
+    unit_of_work_factory: Callable[[], UnitOfWork],
+) -> None:
+    with unit_of_work_factory() as uow:
+        assert uow.projects is not None
+        project = Project(
+            name="Existing Project",
+            location="Chile",
+            operation_type=MiningOperationType.OPEN_PIT,
+        )
+        uow.projects.create(project)
+
+    csv_content = (
+        "project_name,name,status\n"
+        "Existing Project,Broken Scenario,UNKNOWN_STATUS\n"
+    )
+    stream = BytesIO(csv_content.encode("utf-8"))
+
+    preview = ingestion_service.preview_scenarios(stream, "invalid.csv")
+
+    assert preview.rows == []
+    assert preview.summary.total_rows == 1
+    assert preview.summary.errored == 1
+    assert preview.stage_token is None
+    assert len(preview.parser_errors) == 1
+    issue_map = {bundle.row_number: bundle for bundle in preview.row_issues}
+    assert 2 in issue_map
+    bundle = issue_map[2]
+    assert bundle.state == ImportPreviewState.ERROR
+    assert any(detail.field == "status" for detail in bundle.issues)
+    assert all(detail.message for detail in bundle.issues)
+
+
+def test_consume_staged_projects_removes_token(
+    ingestion_service: ImportIngestionService,
+    unit_of_work_factory: Callable[[], UnitOfWork],
+) -> None:
+    with unit_of_work_factory() as uow:
+        assert uow.projects is not None
+
+    csv_content = (
+        "name,location,operation_type\n"
+        "Project X,Peru,open pit\n"
+    )
+    stream = BytesIO(csv_content.encode("utf-8"))
+
+    preview = ingestion_service.preview_projects(stream, "projects.csv")
+    assert preview.stage_token is not None
+    token = preview.stage_token
+
+    initial_view = ingestion_service.get_staged_projects(token)
+    assert isinstance(initial_view, StagedImportView)
+    consumed = ingestion_service.consume_staged_projects(token)
+    assert consumed == initial_view
+    assert ingestion_service.get_staged_projects(token) is None
+    assert ingestion_service.consume_staged_projects(token) is None
+
+
+def test_clear_staged_scenarios_drops_entry(
+    ingestion_service: ImportIngestionService,
+    unit_of_work_factory: Callable[[], UnitOfWork],
+) -> None:
+    with unit_of_work_factory() as uow:
+        assert uow.projects is not None
+        project = Project(
+            name="Project Y",
+            location="Chile",
+            operation_type=MiningOperationType.OPEN_PIT,
+        )
+        uow.projects.create(project)
+
+    csv_content = (
+        "project_name,name,status\n"
+        "Project Y,Scenario 1,Active\n"
+    )
+    stream = BytesIO(csv_content.encode("utf-8"))
+
+    preview = ingestion_service.preview_scenarios(stream, "scenarios.csv")
+    assert preview.stage_token is not None
+    token = preview.stage_token
+
+    assert ingestion_service.get_staged_scenarios(token) is not None
+    assert ingestion_service.clear_staged_scenarios(token) is True
+    assert ingestion_service.get_staged_scenarios(token) is None
+    assert ingestion_service.clear_staged_scenarios(token) is False
--- a/tests/test_import_parsing.py
+++ b/tests/test_import_parsing.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+from io import BytesIO
+
+import pandas as pd
+import pytest
+
+from services.importers import ImportResult, load_project_imports, load_scenario_imports
+from schemas.imports import ProjectImportRow, ScenarioImportRow
+
+
+def test_load_project_imports_from_csv() -> None:
+    csv_content = (
+        "name,location,operation_type,description\n"
+        "Project A,Chile,open pit,First project\n"
+        "Project B,,underground,Second project\n"
+    )
+    stream = BytesIO(csv_content.encode("utf-8"))
+
+    result = load_project_imports(stream, "projects.csv")
+
+    assert isinstance(result, ImportResult)
+    assert len(result.rows) == 2
+    assert not result.errors
+    first = result.rows[0]
+    assert first.row_number == 2
+    assert isinstance(first.data, ProjectImportRow)
+    assert first.data.name == "Project A"
+    assert first.data.operation_type.value == "open_pit"
+    second = result.rows[1]
+    assert second.row_number == 3
+    assert isinstance(second.data, ProjectImportRow)
+    assert second.data.location is None
+
+
+def test_load_scenario_imports_from_excel() -> None:
+    df = pd.DataFrame(
+        [
+            {
+                "project_name": "Project A",
+                "name": "Scenario 1",
+                "status": "Active",
+                "start_date": "2025-01-01",
+                "end_date": "2025-12-31",
+                "discount_rate": "7.5%",
+                "currency": "usd",
+                "primary_resource": "Electricity",
+            }
+        ]
+    )
+    buffer = BytesIO()
+    df.to_excel(buffer, index=False)
+    buffer.seek(0)
+
+    result = load_scenario_imports(buffer, "scenarios.xlsx")
+
+    assert len(result.rows) == 1
+    assert not result.errors
+    row = result.rows[0]
+    assert row.row_number == 2
+    assert isinstance(row.data, ScenarioImportRow)
+    assert row.data.status.value == "active"
+    assert row.data.currency == "USD"
+    assert row.data.discount_rate == pytest.approx(7.5)
+
+
+def test_import_errors_include_row_numbers() -> None:
+    csv_content = "name,operation_type\n,open pit\n"
+    stream = BytesIO(csv_content.encode("utf-8"))
+
+    result = load_project_imports(stream, "projects.csv")
+
+    assert len(result.rows) == 0
+    assert len(result.errors) == 1
+    error = result.errors[0]
+    assert error.row_number == 2
+    assert error.field == "name"
+    assert "required" in error.message