calminer/tests/test_import_ingestion.py

from __future__ import annotations

from io import BytesIO
from typing import Callable

import pandas as pd
import pytest

from models.project import MiningOperationType, Project
from models.scenario import Scenario, ScenarioStatus
from services.importers import (
    ImportIngestionService,
    ImportPreviewState,
    StagedImportView,
)
from services.unit_of_work import UnitOfWork


@pytest.fixture()
def ingestion_service(unit_of_work_factory: Callable[[], UnitOfWork]) -> ImportIngestionService:
    return ImportIngestionService(unit_of_work_factory)


def test_preview_projects_flags_updates_and_duplicates(
    ingestion_service: ImportIngestionService,
    unit_of_work_factory: Callable[[], UnitOfWork],
) -> None:
    with unit_of_work_factory() as uow:
        assert uow.projects is not None
        existing = Project(
            name="Project A",
            location="Chile",
            operation_type=MiningOperationType.OPEN_PIT,
        )
        uow.projects.create(existing)

    csv_content = (
        "name,location,operation_type\n"
        "Project A,Peru,open pit\n"
        "Project B,Canada,underground\n"
        "Project B,Canada,underground\n"
    )
    stream = BytesIO(csv_content.encode("utf-8"))

    preview = ingestion_service.preview_projects(stream, "projects.csv")

    states = [row.state for row in preview.rows]
    assert states == [
        ImportPreviewState.UPDATE,
        ImportPreviewState.NEW,
        ImportPreviewState.SKIP,
    ]
    assert preview.summary.total_rows == 3
    assert preview.summary.accepted == 2
    assert preview.summary.skipped == 1
    assert preview.summary.errored == 0
    assert preview.parser_errors == []
    assert preview.stage_token is not None
    issue_map = {bundle.row_number: bundle for bundle in preview.row_issues}
    assert 2 in issue_map and issue_map[2].state == ImportPreviewState.UPDATE
    assert {
        detail.message for detail in issue_map[2].issues
    } == {"Existing project will be updated."}
    assert 4 in issue_map and issue_map[4].state == ImportPreviewState.SKIP
    assert any(
        "Duplicate project name" in detail.message
        for detail in issue_map[4].issues
    )
    # type: ignore[attr-defined]
    staged = ingestion_service._project_stage[preview.stage_token]
    assert len(staged.rows) == 2
    update_context = preview.rows[0].context
    assert update_context is not None and update_context.get(
        "project_id") is not None


def test_preview_scenarios_validates_projects_and_updates(
    ingestion_service: ImportIngestionService,
    unit_of_work_factory: Callable[[], UnitOfWork],
) -> None:
    with unit_of_work_factory() as uow:
        assert uow.projects is not None and uow.scenarios is not None
        project = Project(
            name="Existing Project",
            location="Chile",
            operation_type=MiningOperationType.OPEN_PIT,
        )
        uow.projects.create(project)
        scenario = Scenario(
            project_id=project.id,
            name="Existing Scenario",
            status=ScenarioStatus.ACTIVE,
        )
        uow.scenarios.create(scenario)

    df = pd.DataFrame(
        [
            {
                "project_name": "Existing Project",
                "name": "Existing Scenario",
                "status": "Active",
            },
            {
                "project_name": "Existing Project",
                "name": "New Scenario",
                "status": "Draft",
            },
            {
                "project_name": "Missing Project",
                "name": "Ghost Scenario",
                "status": "Draft",
            },
            {
                "project_name": "Existing Project",
                "name": "New Scenario",
                "status": "Draft",
            },
        ]
    )
    buffer = BytesIO()
    df.to_csv(buffer, index=False)
    buffer.seek(0)

    preview = ingestion_service.preview_scenarios(buffer, "scenarios.csv")

    states = [row.state for row in preview.rows]
    assert states == [
        ImportPreviewState.UPDATE,
        ImportPreviewState.NEW,
        ImportPreviewState.ERROR,
        ImportPreviewState.SKIP,
    ]
    assert preview.summary.total_rows == 4
    assert preview.summary.accepted == 2
    assert preview.summary.skipped == 1
    assert preview.summary.errored == 1
    assert preview.stage_token is not None
    issue_map = {bundle.row_number: bundle for bundle in preview.row_issues}
    assert 2 in issue_map and issue_map[2].state == ImportPreviewState.UPDATE
    assert 4 in issue_map and issue_map[4].state == ImportPreviewState.ERROR
    assert any(
        "does not exist" in detail.message
        for detail in issue_map[4].issues
    )
    # type: ignore[attr-defined]
    staged = ingestion_service._scenario_stage[preview.stage_token]
    assert len(staged.rows) == 2
    error_row = preview.rows[2]
    assert any("does not exist" in msg for msg in error_row.issues)


def test_preview_scenarios_aggregates_parser_errors(
    ingestion_service: ImportIngestionService,
    unit_of_work_factory: Callable[[], UnitOfWork],
) -> None:
    with unit_of_work_factory() as uow:
        assert uow.projects is not None
        project = Project(
            name="Existing Project",
            location="Chile",
            operation_type=MiningOperationType.OPEN_PIT,
        )
        uow.projects.create(project)

    csv_content = (
        "project_name,name,status\n"
        "Existing Project,Broken Scenario,UNKNOWN_STATUS\n"
    )
    stream = BytesIO(csv_content.encode("utf-8"))

    preview = ingestion_service.preview_scenarios(stream, "invalid.csv")

    assert preview.rows == []
    assert preview.summary.total_rows == 1
    assert preview.summary.errored == 1
    assert preview.stage_token is None
    assert len(preview.parser_errors) == 1
    issue_map = {bundle.row_number: bundle for bundle in preview.row_issues}
    assert 2 in issue_map
    bundle = issue_map[2]
    assert bundle.state == ImportPreviewState.ERROR
    assert any(detail.field == "status" for detail in bundle.issues)
    assert all(detail.message for detail in bundle.issues)


def test_consume_staged_projects_removes_token(
    ingestion_service: ImportIngestionService,
    unit_of_work_factory: Callable[[], UnitOfWork],
) -> None:
    with unit_of_work_factory() as uow:
        assert uow.projects is not None

    csv_content = (
        "name,location,operation_type\n"
        "Project X,Peru,open pit\n"
    )
    stream = BytesIO(csv_content.encode("utf-8"))

    preview = ingestion_service.preview_projects(stream, "projects.csv")
    assert preview.stage_token is not None
    token = preview.stage_token

    initial_view = ingestion_service.get_staged_projects(token)
    assert isinstance(initial_view, StagedImportView)
    consumed = ingestion_service.consume_staged_projects(token)
    assert consumed == initial_view
    assert ingestion_service.get_staged_projects(token) is None
    assert ingestion_service.consume_staged_projects(token) is None


def test_clear_staged_scenarios_drops_entry(
    ingestion_service: ImportIngestionService,
    unit_of_work_factory: Callable[[], UnitOfWork],
) -> None:
    with unit_of_work_factory() as uow:
        assert uow.projects is not None
        project = Project(
            name="Project Y",
            location="Chile",
            operation_type=MiningOperationType.OPEN_PIT,
        )
        uow.projects.create(project)

    csv_content = (
        "project_name,name,status\n"
        "Project Y,Scenario 1,Active\n"
    )
    stream = BytesIO(csv_content.encode("utf-8"))

    preview = ingestion_service.preview_scenarios(stream, "scenarios.csv")
    assert preview.stage_token is not None
    token = preview.stage_token

    assert ingestion_service.get_staged_scenarios(token) is not None
    assert ingestion_service.clear_staged_scenarios(token) is True
    assert ingestion_service.get_staged_scenarios(token) is None
    assert ingestion_service.clear_staged_scenarios(token) is False