feat: implement import functionality for projects and scenarios with CSV/XLSX support, including validation and error handling

This commit is contained in:
2025-11-10 09:10:47 +01:00
parent 7058eb4172
commit 3bc124c11f
7 changed files with 1084 additions and 2 deletions

View File

@@ -30,3 +30,5 @@
- Implemented environment-driven admin bootstrap settings, wired the `bootstrap_admin` helper into FastAPI startup, added pytest coverage for creation/idempotency/reset logic, and documented operational guidance in the RBAC plan and security concept. - Implemented environment-driven admin bootstrap settings, wired the `bootstrap_admin` helper into FastAPI startup, added pytest coverage for creation/idempotency/reset logic, and documented operational guidance in the RBAC plan and security concept.
- Retired the legacy authentication RBAC implementation plan document after migrating its guidance into live documentation and synchronized the contributor instructions to reflect the removal. - Retired the legacy authentication RBAC implementation plan document after migrating its guidance into live documentation and synchronized the contributor instructions to reflect the removal.
- Completed the Authentication & RBAC checklist by shipping the new models, migrations, repositories, guard dependencies, and integration tests. - Completed the Authentication & RBAC checklist by shipping the new models, migrations, repositories, guard dependencies, and integration tests.
- Documented the project/scenario import/export field mapping and file format guidelines in `calminer-docs/requirements/FR-008.md`, and introduced `schemas/imports.py` with Pydantic models that normalise incoming CSV/Excel rows for projects and scenarios.
- Added `services/importers.py` to load CSV/XLSX files into the new import schemas, pulled in `openpyxl` for Excel support, and covered the parsing behaviour with `tests/test_import_parsing.py`.

View File

@@ -11,4 +11,5 @@ numpy
passlib passlib
argon2-cffi argon2-cffi
python-jose python-jose
python-multipart python-multipart
openpyxl

172
schemas/imports.py Normal file
View File

@@ -0,0 +1,172 @@
from __future__ import annotations
from datetime import date, datetime
from typing import Any, Mapping
from pydantic import BaseModel, ConfigDict, field_validator, model_validator
from models import MiningOperationType, ResourceType, ScenarioStatus
def _normalise_string(value: Any) -> str:
if value is None:
return ""
if isinstance(value, str):
return value.strip()
return str(value).strip()
def _strip_or_none(value: Any | None) -> str | None:
if value is None:
return None
text = _normalise_string(value)
return text or None
def _coerce_enum(value: Any, enum_cls: Any, aliases: Mapping[str, Any]) -> Any:
if value is None:
return value
if isinstance(value, enum_cls):
return value
text = _normalise_string(value).lower()
if not text:
return None
if text in aliases:
return aliases[text]
try:
return enum_cls(text)
except ValueError as exc: # pragma: no cover - surfaced by Pydantic
raise ValueError(
f"Invalid value '{value}' for {enum_cls.__name__}") from exc
OPERATION_TYPE_ALIASES: dict[str, MiningOperationType] = {
"open pit": MiningOperationType.OPEN_PIT,
"openpit": MiningOperationType.OPEN_PIT,
"underground": MiningOperationType.UNDERGROUND,
"in-situ leach": MiningOperationType.IN_SITU_LEACH,
"in situ": MiningOperationType.IN_SITU_LEACH,
"placer": MiningOperationType.PLACER,
"quarry": MiningOperationType.QUARRY,
"mountaintop removal": MiningOperationType.MOUNTAINTOP_REMOVAL,
"other": MiningOperationType.OTHER,
}
SCENARIO_STATUS_ALIASES: dict[str, ScenarioStatus] = {
"draft": ScenarioStatus.DRAFT,
"active": ScenarioStatus.ACTIVE,
"archived": ScenarioStatus.ARCHIVED,
}
RESOURCE_TYPE_ALIASES: dict[str, ResourceType] = {
key.replace("_", " ").lower(): value for key, value in ResourceType.__members__.items()
}
RESOURCE_TYPE_ALIASES.update(
{value.value.replace("_", " ").lower(): value for value in ResourceType}
)
class ProjectImportRow(BaseModel):
name: str
location: str | None = None
operation_type: MiningOperationType
description: str | None = None
created_at: datetime | None = None
updated_at: datetime | None = None
model_config = ConfigDict(extra="forbid")
@field_validator("name", mode="before")
@classmethod
def validate_name(cls, value: Any) -> str:
text = _normalise_string(value)
if not text:
raise ValueError("Project name is required")
return text
@field_validator("location", "description", mode="before")
@classmethod
def optional_text(cls, value: Any | None) -> str | None:
return _strip_or_none(value)
@field_validator("operation_type", mode="before")
@classmethod
def map_operation_type(cls, value: Any) -> MiningOperationType | None:
return _coerce_enum(value, MiningOperationType, OPERATION_TYPE_ALIASES)
class ScenarioImportRow(BaseModel):
project_name: str
name: str
status: ScenarioStatus = ScenarioStatus.DRAFT
start_date: date | None = None
end_date: date | None = None
discount_rate: float | None = None
currency: str | None = None
primary_resource: ResourceType | None = None
description: str | None = None
created_at: datetime | None = None
updated_at: datetime | None = None
model_config = ConfigDict(extra="forbid")
@field_validator("project_name", "name", mode="before")
@classmethod
def validate_required_text(cls, value: Any, info) -> str:
text = _normalise_string(value)
if not text:
raise ValueError(
f"{info.field_name.replace('_', ' ').title()} is required")
return text
@field_validator("status", mode="before")
@classmethod
def map_status(cls, value: Any) -> ScenarioStatus | None:
return _coerce_enum(value, ScenarioStatus, SCENARIO_STATUS_ALIASES)
@field_validator("primary_resource", mode="before")
@classmethod
def map_resource(cls, value: Any) -> ResourceType | None:
return _coerce_enum(value, ResourceType, RESOURCE_TYPE_ALIASES)
@field_validator("description", mode="before")
@classmethod
def optional_description(cls, value: Any | None) -> str | None:
return _strip_or_none(value)
@field_validator("currency", mode="before")
@classmethod
def normalise_currency(cls, value: Any | None) -> str | None:
if value is None:
return None
text = _normalise_string(value).upper()
if not text:
return None
if len(text) != 3:
raise ValueError("Currency code must be a 3-letter ISO value")
return text
@field_validator("discount_rate", mode="before")
@classmethod
def coerce_discount_rate(cls, value: Any | None) -> float | None:
if value is None:
return None
if isinstance(value, (int, float)):
return float(value)
text = _normalise_string(value)
if not text:
return None
if text.endswith("%"):
text = text[:-1]
try:
return float(text)
except ValueError as exc:
raise ValueError("Discount rate must be numeric") from exc
@model_validator(mode="after")
def validate_dates(self) -> "ScenarioImportRow":
if self.start_date and self.end_date and self.start_date > self.end_date:
raise ValueError("End date must be on or after start date")
return self

564
services/importers.py Normal file
View File

@@ -0,0 +1,564 @@
from __future__ import annotations
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import Any, BinaryIO, Callable, Generic, Iterable, Mapping, TypeVar, cast
from uuid import uuid4
from types import MappingProxyType
import pandas as pd
from pandas import DataFrame
from pydantic import BaseModel, ValidationError
from models import Project, Scenario
from schemas.imports import ProjectImportRow, ScenarioImportRow
from services.unit_of_work import UnitOfWork
TImportRow = TypeVar("TImportRow", bound=BaseModel)
PROJECT_COLUMNS: tuple[str, ...] = (
"name",
"location",
"operation_type",
"description",
"created_at",
"updated_at",
)
SCENARIO_COLUMNS: tuple[str, ...] = (
"project_name",
"name",
"status",
"start_date",
"end_date",
"discount_rate",
"currency",
"primary_resource",
"description",
"created_at",
"updated_at",
)
@dataclass(slots=True)
class ImportRowError:
row_number: int
field: str | None
message: str
@dataclass(slots=True)
class ParsedImportRow(Generic[TImportRow]):
row_number: int
data: TImportRow
@dataclass(slots=True)
class ImportResult(Generic[TImportRow]):
rows: list[ParsedImportRow[TImportRow]]
errors: list[ImportRowError]
class UnsupportedImportFormat(ValueError):
pass
class ImportPreviewState(str, Enum):
NEW = "new"
UPDATE = "update"
SKIP = "skip"
ERROR = "error"
@dataclass(slots=True)
class ImportPreviewRow(Generic[TImportRow]):
row_number: int
data: TImportRow
state: ImportPreviewState
issues: list[str]
context: dict[str, Any] | None = None
@dataclass(slots=True)
class ImportPreviewSummary:
total_rows: int
accepted: int
skipped: int
errored: int
@dataclass(slots=True)
class ImportPreview(Generic[TImportRow]):
rows: list[ImportPreviewRow[TImportRow]]
summary: ImportPreviewSummary
row_issues: list["ImportPreviewRowIssues"]
parser_errors: list[ImportRowError]
stage_token: str | None
@dataclass(slots=True)
class StagedRow(Generic[TImportRow]):
parsed: ParsedImportRow[TImportRow]
context: dict[str, Any]
@dataclass(slots=True)
class ImportPreviewRowIssue:
message: str
field: str | None = None
@dataclass(slots=True)
class ImportPreviewRowIssues:
row_number: int
state: ImportPreviewState | None
issues: list[ImportPreviewRowIssue]
@dataclass(slots=True)
class StagedImport(Generic[TImportRow]):
token: str
rows: list[StagedRow[TImportRow]]
@dataclass(slots=True, frozen=True)
class StagedRowView(Generic[TImportRow]):
row_number: int
data: TImportRow
context: Mapping[str, Any]
@dataclass(slots=True, frozen=True)
class StagedImportView(Generic[TImportRow]):
token: str
rows: tuple[StagedRowView[TImportRow], ...]
UnitOfWorkFactory = Callable[[], UnitOfWork]
class ImportIngestionService:
"""Coordinates parsing, validation, and preview staging for imports."""
def __init__(self, uow_factory: UnitOfWorkFactory) -> None:
self._uow_factory = uow_factory
self._project_stage: dict[str, StagedImport[ProjectImportRow]] = {}
self._scenario_stage: dict[str, StagedImport[ScenarioImportRow]] = {}
def preview_projects(
self,
stream: BinaryIO,
filename: str,
) -> ImportPreview[ProjectImportRow]:
result = load_project_imports(stream, filename)
parser_errors = result.errors
preview_rows: list[ImportPreviewRow[ProjectImportRow]] = []
staged_rows: list[StagedRow[ProjectImportRow]] = []
accepted = skipped = errored = 0
seen_names: set[str] = set()
existing_by_name: dict[str, Project] = {}
if result.rows:
with self._uow_factory() as uow:
if not uow.projects:
raise RuntimeError("Project repository is unavailable")
existing_by_name = dict(
uow.projects.find_by_names(
parsed.data.name for parsed in result.rows
)
)
for parsed in result.rows:
name_key = _normalise_key(parsed.data.name)
issues: list[str] = []
context: dict[str, Any] | None = None
state = ImportPreviewState.NEW
if name_key in seen_names:
state = ImportPreviewState.SKIP
issues.append(
"Duplicate project name within upload; row skipped.")
else:
seen_names.add(name_key)
existing = existing_by_name.get(name_key)
if existing:
state = ImportPreviewState.UPDATE
context = {
"mode": "update",
"project_id": existing.id,
}
issues.append("Existing project will be updated.")
else:
context = {"mode": "create"}
preview_rows.append(
ImportPreviewRow(
row_number=parsed.row_number,
data=parsed.data,
state=state,
issues=issues,
context=context,
)
)
if state in {ImportPreviewState.NEW, ImportPreviewState.UPDATE}:
accepted += 1
staged_rows.append(
StagedRow(parsed=parsed, context=context or {
"mode": "create"})
)
elif state == ImportPreviewState.SKIP:
skipped += 1
else:
errored += 1
parser_error_rows = {error.row_number for error in parser_errors}
errored += len(parser_error_rows)
total_rows = len(preview_rows) + len(parser_error_rows)
summary = ImportPreviewSummary(
total_rows=total_rows,
accepted=accepted,
skipped=skipped,
errored=errored,
)
row_issues = _compile_row_issues(preview_rows, parser_errors)
stage_token: str | None = None
if staged_rows:
stage_token = self._store_project_stage(staged_rows)
return ImportPreview(
rows=preview_rows,
summary=summary,
row_issues=row_issues,
parser_errors=parser_errors,
stage_token=stage_token,
)
def preview_scenarios(
self,
stream: BinaryIO,
filename: str,
) -> ImportPreview[ScenarioImportRow]:
result = load_scenario_imports(stream, filename)
parser_errors = result.errors
preview_rows: list[ImportPreviewRow[ScenarioImportRow]] = []
staged_rows: list[StagedRow[ScenarioImportRow]] = []
accepted = skipped = errored = 0
seen_pairs: set[tuple[str, str]] = set()
existing_projects: dict[str, Project] = {}
existing_scenarios: dict[tuple[int, str], Scenario] = {}
if result.rows:
with self._uow_factory() as uow:
if not uow.projects or not uow.scenarios:
raise RuntimeError("Repositories are unavailable")
existing_projects = dict(
uow.projects.find_by_names(
parsed.data.project_name for parsed in result.rows
)
)
names_by_project: dict[int, set[str]] = {}
for parsed in result.rows:
project = existing_projects.get(
_normalise_key(parsed.data.project_name)
)
if not project:
continue
names_by_project.setdefault(project.id, set()).add(
_normalise_key(parsed.data.name)
)
for project_id, names in names_by_project.items():
matches = uow.scenarios.find_by_project_and_names(
project_id, names)
for name_key, scenario in matches.items():
existing_scenarios[(project_id, name_key)] = scenario
for parsed in result.rows:
project_key = _normalise_key(parsed.data.project_name)
scenario_key = _normalise_key(parsed.data.name)
issues: list[str] = []
context: dict[str, Any] | None = None
state = ImportPreviewState.NEW
if (project_key, scenario_key) in seen_pairs:
state = ImportPreviewState.SKIP
issues.append(
"Duplicate scenario for project within upload; row skipped."
)
else:
seen_pairs.add((project_key, scenario_key))
project = existing_projects.get(project_key)
if not project:
state = ImportPreviewState.ERROR
issues.append(
f"Project '{parsed.data.project_name}' does not exist."
)
else:
context = {"mode": "create", "project_id": project.id}
existing = existing_scenarios.get(
(project.id, scenario_key))
if existing:
state = ImportPreviewState.UPDATE
context = {
"mode": "update",
"project_id": project.id,
"scenario_id": existing.id,
}
issues.append("Existing scenario will be updated.")
preview_rows.append(
ImportPreviewRow(
row_number=parsed.row_number,
data=parsed.data,
state=state,
issues=issues,
context=context,
)
)
if state in {ImportPreviewState.NEW, ImportPreviewState.UPDATE}:
accepted += 1
staged_rows.append(
StagedRow(parsed=parsed, context=context or {
"mode": "create"})
)
elif state == ImportPreviewState.SKIP:
skipped += 1
else:
errored += 1
parser_error_rows = {error.row_number for error in parser_errors}
errored += len(parser_error_rows)
total_rows = len(preview_rows) + len(parser_error_rows)
summary = ImportPreviewSummary(
total_rows=total_rows,
accepted=accepted,
skipped=skipped,
errored=errored,
)
row_issues = _compile_row_issues(preview_rows, parser_errors)
stage_token: str | None = None
if staged_rows:
stage_token = self._store_scenario_stage(staged_rows)
return ImportPreview(
rows=preview_rows,
summary=summary,
row_issues=row_issues,
parser_errors=parser_errors,
stage_token=stage_token,
)
def get_staged_projects(
self, token: str
) -> StagedImportView[ProjectImportRow] | None:
staged = self._project_stage.get(token)
if not staged:
return None
return _build_staged_view(staged)
def get_staged_scenarios(
self, token: str
) -> StagedImportView[ScenarioImportRow] | None:
staged = self._scenario_stage.get(token)
if not staged:
return None
return _build_staged_view(staged)
def consume_staged_projects(
self, token: str
) -> StagedImportView[ProjectImportRow] | None:
staged = self._project_stage.pop(token, None)
if not staged:
return None
return _build_staged_view(staged)
def consume_staged_scenarios(
self, token: str
) -> StagedImportView[ScenarioImportRow] | None:
staged = self._scenario_stage.pop(token, None)
if not staged:
return None
return _build_staged_view(staged)
def clear_staged_projects(self, token: str) -> bool:
return self._project_stage.pop(token, None) is not None
def clear_staged_scenarios(self, token: str) -> bool:
return self._scenario_stage.pop(token, None) is not None
def _store_project_stage(
self, rows: list[StagedRow[ProjectImportRow]]
) -> str:
token = str(uuid4())
self._project_stage[token] = StagedImport(token=token, rows=rows)
return token
def _store_scenario_stage(
self, rows: list[StagedRow[ScenarioImportRow]]
) -> str:
token = str(uuid4())
self._scenario_stage[token] = StagedImport(token=token, rows=rows)
return token
def load_project_imports(stream: BinaryIO, filename: str) -> ImportResult[ProjectImportRow]:
df = _load_dataframe(stream, filename)
return _parse_dataframe(df, ProjectImportRow, PROJECT_COLUMNS)
def load_scenario_imports(stream: BinaryIO, filename: str) -> ImportResult[ScenarioImportRow]:
df = _load_dataframe(stream, filename)
return _parse_dataframe(df, ScenarioImportRow, SCENARIO_COLUMNS)
def _load_dataframe(stream: BinaryIO, filename: str) -> DataFrame:
stream.seek(0)
suffix = Path(filename).suffix.lower()
if suffix == ".csv":
df = pd.read_csv(stream, dtype=str,
keep_default_na=False, encoding="utf-8")
elif suffix in {".xls", ".xlsx"}:
df = pd.read_excel(stream, dtype=str, engine="openpyxl")
else:
raise UnsupportedImportFormat(
f"Unsupported file type: {suffix or 'unknown'}")
df.columns = [str(col).strip().lower() for col in df.columns]
return df
def _parse_dataframe(
df: DataFrame,
model: type[TImportRow],
expected_columns: Iterable[str],
) -> ImportResult[TImportRow]:
rows: list[ParsedImportRow[TImportRow]] = []
errors: list[ImportRowError] = []
for index, raw in enumerate(df.to_dict(orient="records"), start=2):
payload = _prepare_payload(
cast(dict[str, object], raw), expected_columns)
try:
rows.append(
ParsedImportRow(row_number=index, data=model(**payload))
)
except ValidationError as exc: # pragma: no cover - exercised via tests
for detail in exc.errors():
loc = ".".join(str(part)
for part in detail.get("loc", [])) or None
errors.append(
ImportRowError(
row_number=index,
field=loc,
message=detail.get("msg", "Invalid value"),
)
)
return ImportResult(rows=rows, errors=errors)
def _prepare_payload(
raw: dict[str, object], expected_columns: Iterable[str]
) -> dict[str, object | None]:
payload: dict[str, object | None] = {}
for column in expected_columns:
if column not in raw:
continue
value = raw.get(column)
if isinstance(value, str):
value = value.strip()
if value == "":
value = None
if value is not None and pd.isna(cast(Any, value)):
value = None
payload[column] = value
return payload
def _normalise_key(value: str) -> str:
return value.strip().lower()
def _build_staged_view(
staged: StagedImport[TImportRow],
) -> StagedImportView[TImportRow]:
rows = tuple(
StagedRowView(
row_number=row.parsed.row_number,
data=cast(TImportRow, _deep_copy_model(row.parsed.data)),
context=MappingProxyType(dict(row.context)),
)
for row in staged.rows
)
return StagedImportView(token=staged.token, rows=rows)
def _deep_copy_model(model: BaseModel) -> BaseModel:
copy_method = getattr(model, "model_copy", None)
if callable(copy_method): # pydantic v2
return cast(BaseModel, copy_method(deep=True))
return model.copy(deep=True) # type: ignore[attr-defined]
def _compile_row_issues(
preview_rows: Iterable[ImportPreviewRow[Any]],
parser_errors: Iterable[ImportRowError],
) -> list[ImportPreviewRowIssues]:
issue_map: dict[int, ImportPreviewRowIssues] = {}
def ensure_bundle(
row_number: int,
state: ImportPreviewState | None,
) -> ImportPreviewRowIssues:
bundle = issue_map.get(row_number)
if bundle is None:
bundle = ImportPreviewRowIssues(
row_number=row_number,
state=state,
issues=[],
)
issue_map[row_number] = bundle
else:
if _state_priority(state) > _state_priority(bundle.state):
bundle.state = state
return bundle
for row in preview_rows:
if not row.issues:
continue
bundle = ensure_bundle(row.row_number, row.state)
for message in row.issues:
bundle.issues.append(ImportPreviewRowIssue(message=message))
for error in parser_errors:
bundle = ensure_bundle(error.row_number, ImportPreviewState.ERROR)
bundle.issues.append(
ImportPreviewRowIssue(message=error.message, field=error.field)
)
return sorted(issue_map.values(), key=lambda item: item.row_number)
def _state_priority(state: ImportPreviewState | None) -> int:
if state is None:
return -1
if state == ImportPreviewState.ERROR:
return 3
if state == ImportPreviewState.SKIP:
return 2
if state == ImportPreviewState.UPDATE:
return 1
return 0

View File

@@ -2,7 +2,7 @@ from __future__ import annotations
from collections.abc import Iterable from collections.abc import Iterable
from datetime import datetime from datetime import datetime
from typing import Sequence from typing import Mapping, Sequence
from sqlalchemy import select, func from sqlalchemy import select, func
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError
@@ -70,6 +70,15 @@ class ProjectRepository:
"Project violates uniqueness constraints") from exc "Project violates uniqueness constraints") from exc
return project return project
def find_by_names(self, names: Iterable[str]) -> Mapping[str, Project]:
normalised = {name.strip().lower()
for name in names if name and name.strip()}
if not normalised:
return {}
stmt = select(Project).where(func.lower(Project.name).in_(normalised))
records = self.session.execute(stmt).scalars().all()
return {project.name.lower(): project for project in records}
def delete(self, project_id: int) -> None: def delete(self, project_id: int) -> None:
project = self.get(project_id) project = self.get(project_id)
self.session.delete(project) self.session.delete(project)
@@ -149,6 +158,25 @@ class ScenarioRepository:
raise EntityConflictError("Scenario violates constraints") from exc raise EntityConflictError("Scenario violates constraints") from exc
return scenario return scenario
def find_by_project_and_names(
self,
project_id: int,
names: Iterable[str],
) -> Mapping[str, Scenario]:
normalised = {name.strip().lower()
for name in names if name and name.strip()}
if not normalised:
return {}
stmt = (
select(Scenario)
.where(
Scenario.project_id == project_id,
func.lower(Scenario.name).in_(normalised),
)
)
records = self.session.execute(stmt).scalars().all()
return {scenario.name.lower(): scenario for scenario in records}
def delete(self, scenario_id: int) -> None: def delete(self, scenario_id: int) -> None:
scenario = self.get(scenario_id) scenario = self.get(scenario_id)
self.session.delete(scenario) self.session.delete(scenario)

View File

@@ -0,0 +1,237 @@
from __future__ import annotations
from io import BytesIO
from typing import Callable
import pandas as pd
import pytest
from models.project import MiningOperationType, Project
from models.scenario import Scenario, ScenarioStatus
from services.importers import (
ImportIngestionService,
ImportPreviewState,
StagedImportView,
)
from services.unit_of_work import UnitOfWork
@pytest.fixture()
def ingestion_service(unit_of_work_factory: Callable[[], UnitOfWork]) -> ImportIngestionService:
return ImportIngestionService(unit_of_work_factory)
def test_preview_projects_flags_updates_and_duplicates(
ingestion_service: ImportIngestionService,
unit_of_work_factory: Callable[[], UnitOfWork],
) -> None:
with unit_of_work_factory() as uow:
assert uow.projects is not None
existing = Project(
name="Project A",
location="Chile",
operation_type=MiningOperationType.OPEN_PIT,
)
uow.projects.create(existing)
csv_content = (
"name,location,operation_type\n"
"Project A,Peru,open pit\n"
"Project B,Canada,underground\n"
"Project B,Canada,underground\n"
)
stream = BytesIO(csv_content.encode("utf-8"))
preview = ingestion_service.preview_projects(stream, "projects.csv")
states = [row.state for row in preview.rows]
assert states == [
ImportPreviewState.UPDATE,
ImportPreviewState.NEW,
ImportPreviewState.SKIP,
]
assert preview.summary.total_rows == 3
assert preview.summary.accepted == 2
assert preview.summary.skipped == 1
assert preview.summary.errored == 0
assert preview.parser_errors == []
assert preview.stage_token is not None
issue_map = {bundle.row_number: bundle for bundle in preview.row_issues}
assert 2 in issue_map and issue_map[2].state == ImportPreviewState.UPDATE
assert {
detail.message for detail in issue_map[2].issues
} == {"Existing project will be updated."}
assert 4 in issue_map and issue_map[4].state == ImportPreviewState.SKIP
assert any(
"Duplicate project name" in detail.message
for detail in issue_map[4].issues
)
# type: ignore[attr-defined]
staged = ingestion_service._project_stage[preview.stage_token]
assert len(staged.rows) == 2
update_context = preview.rows[0].context
assert update_context is not None and update_context.get(
"project_id") is not None
def test_preview_scenarios_validates_projects_and_updates(
ingestion_service: ImportIngestionService,
unit_of_work_factory: Callable[[], UnitOfWork],
) -> None:
with unit_of_work_factory() as uow:
assert uow.projects is not None and uow.scenarios is not None
project = Project(
name="Existing Project",
location="Chile",
operation_type=MiningOperationType.OPEN_PIT,
)
uow.projects.create(project)
scenario = Scenario(
project_id=project.id,
name="Existing Scenario",
status=ScenarioStatus.ACTIVE,
)
uow.scenarios.create(scenario)
df = pd.DataFrame(
[
{
"project_name": "Existing Project",
"name": "Existing Scenario",
"status": "Active",
},
{
"project_name": "Existing Project",
"name": "New Scenario",
"status": "Draft",
},
{
"project_name": "Missing Project",
"name": "Ghost Scenario",
"status": "Draft",
},
{
"project_name": "Existing Project",
"name": "New Scenario",
"status": "Draft",
},
]
)
buffer = BytesIO()
df.to_csv(buffer, index=False)
buffer.seek(0)
preview = ingestion_service.preview_scenarios(buffer, "scenarios.csv")
states = [row.state for row in preview.rows]
assert states == [
ImportPreviewState.UPDATE,
ImportPreviewState.NEW,
ImportPreviewState.ERROR,
ImportPreviewState.SKIP,
]
assert preview.summary.total_rows == 4
assert preview.summary.accepted == 2
assert preview.summary.skipped == 1
assert preview.summary.errored == 1
assert preview.stage_token is not None
issue_map = {bundle.row_number: bundle for bundle in preview.row_issues}
assert 2 in issue_map and issue_map[2].state == ImportPreviewState.UPDATE
assert 4 in issue_map and issue_map[4].state == ImportPreviewState.ERROR
assert any(
"does not exist" in detail.message
for detail in issue_map[4].issues
)
# type: ignore[attr-defined]
staged = ingestion_service._scenario_stage[preview.stage_token]
assert len(staged.rows) == 2
error_row = preview.rows[2]
assert any("does not exist" in msg for msg in error_row.issues)
def test_preview_scenarios_aggregates_parser_errors(
ingestion_service: ImportIngestionService,
unit_of_work_factory: Callable[[], UnitOfWork],
) -> None:
with unit_of_work_factory() as uow:
assert uow.projects is not None
project = Project(
name="Existing Project",
location="Chile",
operation_type=MiningOperationType.OPEN_PIT,
)
uow.projects.create(project)
csv_content = (
"project_name,name,status\n"
"Existing Project,Broken Scenario,UNKNOWN_STATUS\n"
)
stream = BytesIO(csv_content.encode("utf-8"))
preview = ingestion_service.preview_scenarios(stream, "invalid.csv")
assert preview.rows == []
assert preview.summary.total_rows == 1
assert preview.summary.errored == 1
assert preview.stage_token is None
assert len(preview.parser_errors) == 1
issue_map = {bundle.row_number: bundle for bundle in preview.row_issues}
assert 2 in issue_map
bundle = issue_map[2]
assert bundle.state == ImportPreviewState.ERROR
assert any(detail.field == "status" for detail in bundle.issues)
assert all(detail.message for detail in bundle.issues)
def test_consume_staged_projects_removes_token(
ingestion_service: ImportIngestionService,
unit_of_work_factory: Callable[[], UnitOfWork],
) -> None:
with unit_of_work_factory() as uow:
assert uow.projects is not None
csv_content = (
"name,location,operation_type\n"
"Project X,Peru,open pit\n"
)
stream = BytesIO(csv_content.encode("utf-8"))
preview = ingestion_service.preview_projects(stream, "projects.csv")
assert preview.stage_token is not None
token = preview.stage_token
initial_view = ingestion_service.get_staged_projects(token)
assert isinstance(initial_view, StagedImportView)
consumed = ingestion_service.consume_staged_projects(token)
assert consumed == initial_view
assert ingestion_service.get_staged_projects(token) is None
assert ingestion_service.consume_staged_projects(token) is None
def test_clear_staged_scenarios_drops_entry(
ingestion_service: ImportIngestionService,
unit_of_work_factory: Callable[[], UnitOfWork],
) -> None:
with unit_of_work_factory() as uow:
assert uow.projects is not None
project = Project(
name="Project Y",
location="Chile",
operation_type=MiningOperationType.OPEN_PIT,
)
uow.projects.create(project)
csv_content = (
"project_name,name,status\n"
"Project Y,Scenario 1,Active\n"
)
stream = BytesIO(csv_content.encode("utf-8"))
preview = ingestion_service.preview_scenarios(stream, "scenarios.csv")
assert preview.stage_token is not None
token = preview.stage_token
assert ingestion_service.get_staged_scenarios(token) is not None
assert ingestion_service.clear_staged_scenarios(token) is True
assert ingestion_service.get_staged_scenarios(token) is None
assert ingestion_service.clear_staged_scenarios(token) is False

View File

@@ -0,0 +1,78 @@
from __future__ import annotations
from io import BytesIO
import pandas as pd
import pytest
from services.importers import ImportResult, load_project_imports, load_scenario_imports
from schemas.imports import ProjectImportRow, ScenarioImportRow
def test_load_project_imports_from_csv() -> None:
csv_content = (
"name,location,operation_type,description\n"
"Project A,Chile,open pit,First project\n"
"Project B,,underground,Second project\n"
)
stream = BytesIO(csv_content.encode("utf-8"))
result = load_project_imports(stream, "projects.csv")
assert isinstance(result, ImportResult)
assert len(result.rows) == 2
assert not result.errors
first = result.rows[0]
assert first.row_number == 2
assert isinstance(first.data, ProjectImportRow)
assert first.data.name == "Project A"
assert first.data.operation_type.value == "open_pit"
second = result.rows[1]
assert second.row_number == 3
assert isinstance(second.data, ProjectImportRow)
assert second.data.location is None
def test_load_scenario_imports_from_excel() -> None:
df = pd.DataFrame(
[
{
"project_name": "Project A",
"name": "Scenario 1",
"status": "Active",
"start_date": "2025-01-01",
"end_date": "2025-12-31",
"discount_rate": "7.5%",
"currency": "usd",
"primary_resource": "Electricity",
}
]
)
buffer = BytesIO()
df.to_excel(buffer, index=False)
buffer.seek(0)
result = load_scenario_imports(buffer, "scenarios.xlsx")
assert len(result.rows) == 1
assert not result.errors
row = result.rows[0]
assert row.row_number == 2
assert isinstance(row.data, ScenarioImportRow)
assert row.data.status.value == "active"
assert row.data.currency == "USD"
assert row.data.discount_rate == pytest.approx(7.5)
def test_import_errors_include_row_numbers() -> None:
csv_content = "name,operation_type\n,open pit\n"
stream = BytesIO(csv_content.encode("utf-8"))
result = load_project_imports(stream, "projects.csv")
assert len(result.rows) == 0
assert len(result.errors) == 1
error = result.errors[0]
assert error.row_number == 2
assert error.field == "name"
assert "required" in error.message