feat: implement import functionality for projects and scenarios with CSV/XLSX support, including validation and error handling
This commit is contained in:
237
tests/test_import_ingestion.py
Normal file
237
tests/test_import_ingestion.py
Normal file
@@ -0,0 +1,237 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from io import BytesIO
|
||||
from typing import Callable
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from models.project import MiningOperationType, Project
|
||||
from models.scenario import Scenario, ScenarioStatus
|
||||
from services.importers import (
|
||||
ImportIngestionService,
|
||||
ImportPreviewState,
|
||||
StagedImportView,
|
||||
)
|
||||
from services.unit_of_work import UnitOfWork
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def ingestion_service(unit_of_work_factory: Callable[[], UnitOfWork]) -> ImportIngestionService:
|
||||
return ImportIngestionService(unit_of_work_factory)
|
||||
|
||||
|
||||
def test_preview_projects_flags_updates_and_duplicates(
|
||||
ingestion_service: ImportIngestionService,
|
||||
unit_of_work_factory: Callable[[], UnitOfWork],
|
||||
) -> None:
|
||||
with unit_of_work_factory() as uow:
|
||||
assert uow.projects is not None
|
||||
existing = Project(
|
||||
name="Project A",
|
||||
location="Chile",
|
||||
operation_type=MiningOperationType.OPEN_PIT,
|
||||
)
|
||||
uow.projects.create(existing)
|
||||
|
||||
csv_content = (
|
||||
"name,location,operation_type\n"
|
||||
"Project A,Peru,open pit\n"
|
||||
"Project B,Canada,underground\n"
|
||||
"Project B,Canada,underground\n"
|
||||
)
|
||||
stream = BytesIO(csv_content.encode("utf-8"))
|
||||
|
||||
preview = ingestion_service.preview_projects(stream, "projects.csv")
|
||||
|
||||
states = [row.state for row in preview.rows]
|
||||
assert states == [
|
||||
ImportPreviewState.UPDATE,
|
||||
ImportPreviewState.NEW,
|
||||
ImportPreviewState.SKIP,
|
||||
]
|
||||
assert preview.summary.total_rows == 3
|
||||
assert preview.summary.accepted == 2
|
||||
assert preview.summary.skipped == 1
|
||||
assert preview.summary.errored == 0
|
||||
assert preview.parser_errors == []
|
||||
assert preview.stage_token is not None
|
||||
issue_map = {bundle.row_number: bundle for bundle in preview.row_issues}
|
||||
assert 2 in issue_map and issue_map[2].state == ImportPreviewState.UPDATE
|
||||
assert {
|
||||
detail.message for detail in issue_map[2].issues
|
||||
} == {"Existing project will be updated."}
|
||||
assert 4 in issue_map and issue_map[4].state == ImportPreviewState.SKIP
|
||||
assert any(
|
||||
"Duplicate project name" in detail.message
|
||||
for detail in issue_map[4].issues
|
||||
)
|
||||
# type: ignore[attr-defined]
|
||||
staged = ingestion_service._project_stage[preview.stage_token]
|
||||
assert len(staged.rows) == 2
|
||||
update_context = preview.rows[0].context
|
||||
assert update_context is not None and update_context.get(
|
||||
"project_id") is not None
|
||||
|
||||
|
||||
def test_preview_scenarios_validates_projects_and_updates(
|
||||
ingestion_service: ImportIngestionService,
|
||||
unit_of_work_factory: Callable[[], UnitOfWork],
|
||||
) -> None:
|
||||
with unit_of_work_factory() as uow:
|
||||
assert uow.projects is not None and uow.scenarios is not None
|
||||
project = Project(
|
||||
name="Existing Project",
|
||||
location="Chile",
|
||||
operation_type=MiningOperationType.OPEN_PIT,
|
||||
)
|
||||
uow.projects.create(project)
|
||||
scenario = Scenario(
|
||||
project_id=project.id,
|
||||
name="Existing Scenario",
|
||||
status=ScenarioStatus.ACTIVE,
|
||||
)
|
||||
uow.scenarios.create(scenario)
|
||||
|
||||
df = pd.DataFrame(
|
||||
[
|
||||
{
|
||||
"project_name": "Existing Project",
|
||||
"name": "Existing Scenario",
|
||||
"status": "Active",
|
||||
},
|
||||
{
|
||||
"project_name": "Existing Project",
|
||||
"name": "New Scenario",
|
||||
"status": "Draft",
|
||||
},
|
||||
{
|
||||
"project_name": "Missing Project",
|
||||
"name": "Ghost Scenario",
|
||||
"status": "Draft",
|
||||
},
|
||||
{
|
||||
"project_name": "Existing Project",
|
||||
"name": "New Scenario",
|
||||
"status": "Draft",
|
||||
},
|
||||
]
|
||||
)
|
||||
buffer = BytesIO()
|
||||
df.to_csv(buffer, index=False)
|
||||
buffer.seek(0)
|
||||
|
||||
preview = ingestion_service.preview_scenarios(buffer, "scenarios.csv")
|
||||
|
||||
states = [row.state for row in preview.rows]
|
||||
assert states == [
|
||||
ImportPreviewState.UPDATE,
|
||||
ImportPreviewState.NEW,
|
||||
ImportPreviewState.ERROR,
|
||||
ImportPreviewState.SKIP,
|
||||
]
|
||||
assert preview.summary.total_rows == 4
|
||||
assert preview.summary.accepted == 2
|
||||
assert preview.summary.skipped == 1
|
||||
assert preview.summary.errored == 1
|
||||
assert preview.stage_token is not None
|
||||
issue_map = {bundle.row_number: bundle for bundle in preview.row_issues}
|
||||
assert 2 in issue_map and issue_map[2].state == ImportPreviewState.UPDATE
|
||||
assert 4 in issue_map and issue_map[4].state == ImportPreviewState.ERROR
|
||||
assert any(
|
||||
"does not exist" in detail.message
|
||||
for detail in issue_map[4].issues
|
||||
)
|
||||
# type: ignore[attr-defined]
|
||||
staged = ingestion_service._scenario_stage[preview.stage_token]
|
||||
assert len(staged.rows) == 2
|
||||
error_row = preview.rows[2]
|
||||
assert any("does not exist" in msg for msg in error_row.issues)
|
||||
|
||||
|
||||
def test_preview_scenarios_aggregates_parser_errors(
|
||||
ingestion_service: ImportIngestionService,
|
||||
unit_of_work_factory: Callable[[], UnitOfWork],
|
||||
) -> None:
|
||||
with unit_of_work_factory() as uow:
|
||||
assert uow.projects is not None
|
||||
project = Project(
|
||||
name="Existing Project",
|
||||
location="Chile",
|
||||
operation_type=MiningOperationType.OPEN_PIT,
|
||||
)
|
||||
uow.projects.create(project)
|
||||
|
||||
csv_content = (
|
||||
"project_name,name,status\n"
|
||||
"Existing Project,Broken Scenario,UNKNOWN_STATUS\n"
|
||||
)
|
||||
stream = BytesIO(csv_content.encode("utf-8"))
|
||||
|
||||
preview = ingestion_service.preview_scenarios(stream, "invalid.csv")
|
||||
|
||||
assert preview.rows == []
|
||||
assert preview.summary.total_rows == 1
|
||||
assert preview.summary.errored == 1
|
||||
assert preview.stage_token is None
|
||||
assert len(preview.parser_errors) == 1
|
||||
issue_map = {bundle.row_number: bundle for bundle in preview.row_issues}
|
||||
assert 2 in issue_map
|
||||
bundle = issue_map[2]
|
||||
assert bundle.state == ImportPreviewState.ERROR
|
||||
assert any(detail.field == "status" for detail in bundle.issues)
|
||||
assert all(detail.message for detail in bundle.issues)
|
||||
|
||||
|
||||
def test_consume_staged_projects_removes_token(
|
||||
ingestion_service: ImportIngestionService,
|
||||
unit_of_work_factory: Callable[[], UnitOfWork],
|
||||
) -> None:
|
||||
with unit_of_work_factory() as uow:
|
||||
assert uow.projects is not None
|
||||
|
||||
csv_content = (
|
||||
"name,location,operation_type\n"
|
||||
"Project X,Peru,open pit\n"
|
||||
)
|
||||
stream = BytesIO(csv_content.encode("utf-8"))
|
||||
|
||||
preview = ingestion_service.preview_projects(stream, "projects.csv")
|
||||
assert preview.stage_token is not None
|
||||
token = preview.stage_token
|
||||
|
||||
initial_view = ingestion_service.get_staged_projects(token)
|
||||
assert isinstance(initial_view, StagedImportView)
|
||||
consumed = ingestion_service.consume_staged_projects(token)
|
||||
assert consumed == initial_view
|
||||
assert ingestion_service.get_staged_projects(token) is None
|
||||
assert ingestion_service.consume_staged_projects(token) is None
|
||||
|
||||
|
||||
def test_clear_staged_scenarios_drops_entry(
|
||||
ingestion_service: ImportIngestionService,
|
||||
unit_of_work_factory: Callable[[], UnitOfWork],
|
||||
) -> None:
|
||||
with unit_of_work_factory() as uow:
|
||||
assert uow.projects is not None
|
||||
project = Project(
|
||||
name="Project Y",
|
||||
location="Chile",
|
||||
operation_type=MiningOperationType.OPEN_PIT,
|
||||
)
|
||||
uow.projects.create(project)
|
||||
|
||||
csv_content = (
|
||||
"project_name,name,status\n"
|
||||
"Project Y,Scenario 1,Active\n"
|
||||
)
|
||||
stream = BytesIO(csv_content.encode("utf-8"))
|
||||
|
||||
preview = ingestion_service.preview_scenarios(stream, "scenarios.csv")
|
||||
assert preview.stage_token is not None
|
||||
token = preview.stage_token
|
||||
|
||||
assert ingestion_service.get_staged_scenarios(token) is not None
|
||||
assert ingestion_service.clear_staged_scenarios(token) is True
|
||||
assert ingestion_service.get_staged_scenarios(token) is None
|
||||
assert ingestion_service.clear_staged_scenarios(token) is False
|
||||
78
tests/test_import_parsing.py
Normal file
78
tests/test_import_parsing.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from io import BytesIO
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from services.importers import ImportResult, load_project_imports, load_scenario_imports
|
||||
from schemas.imports import ProjectImportRow, ScenarioImportRow
|
||||
|
||||
|
||||
def test_load_project_imports_from_csv() -> None:
|
||||
csv_content = (
|
||||
"name,location,operation_type,description\n"
|
||||
"Project A,Chile,open pit,First project\n"
|
||||
"Project B,,underground,Second project\n"
|
||||
)
|
||||
stream = BytesIO(csv_content.encode("utf-8"))
|
||||
|
||||
result = load_project_imports(stream, "projects.csv")
|
||||
|
||||
assert isinstance(result, ImportResult)
|
||||
assert len(result.rows) == 2
|
||||
assert not result.errors
|
||||
first = result.rows[0]
|
||||
assert first.row_number == 2
|
||||
assert isinstance(first.data, ProjectImportRow)
|
||||
assert first.data.name == "Project A"
|
||||
assert first.data.operation_type.value == "open_pit"
|
||||
second = result.rows[1]
|
||||
assert second.row_number == 3
|
||||
assert isinstance(second.data, ProjectImportRow)
|
||||
assert second.data.location is None
|
||||
|
||||
|
||||
def test_load_scenario_imports_from_excel() -> None:
|
||||
df = pd.DataFrame(
|
||||
[
|
||||
{
|
||||
"project_name": "Project A",
|
||||
"name": "Scenario 1",
|
||||
"status": "Active",
|
||||
"start_date": "2025-01-01",
|
||||
"end_date": "2025-12-31",
|
||||
"discount_rate": "7.5%",
|
||||
"currency": "usd",
|
||||
"primary_resource": "Electricity",
|
||||
}
|
||||
]
|
||||
)
|
||||
buffer = BytesIO()
|
||||
df.to_excel(buffer, index=False)
|
||||
buffer.seek(0)
|
||||
|
||||
result = load_scenario_imports(buffer, "scenarios.xlsx")
|
||||
|
||||
assert len(result.rows) == 1
|
||||
assert not result.errors
|
||||
row = result.rows[0]
|
||||
assert row.row_number == 2
|
||||
assert isinstance(row.data, ScenarioImportRow)
|
||||
assert row.data.status.value == "active"
|
||||
assert row.data.currency == "USD"
|
||||
assert row.data.discount_rate == pytest.approx(7.5)
|
||||
|
||||
|
||||
def test_import_errors_include_row_numbers() -> None:
|
||||
csv_content = "name,operation_type\n,open pit\n"
|
||||
stream = BytesIO(csv_content.encode("utf-8"))
|
||||
|
||||
result = load_project_imports(stream, "projects.csv")
|
||||
|
||||
assert len(result.rows) == 0
|
||||
assert len(result.errors) == 1
|
||||
error = result.errors[0]
|
||||
assert error.row_number == 2
|
||||
assert error.field == "name"
|
||||
assert "required" in error.message
|
||||
Reference in New Issue
Block a user