Files
calminer/tests/test_import_parsing.py

143 lines
4.1 KiB
Python

from __future__ import annotations
from io import BytesIO
from textwrap import dedent
import pandas as pd
import pytest
from services.importers import ImportResult, load_project_imports, load_scenario_imports
from schemas.imports import ProjectImportRow, ScenarioImportRow
from models.project import MiningOperationType
def test_load_project_imports_from_csv() -> None:
csv_content = (
"name,location,operation_type,description\n"
"Project A,Chile,open pit,First project\n"
"Project B,,underground,Second project\n"
)
stream = BytesIO(csv_content.encode("utf-8"))
result = load_project_imports(stream, "projects.csv")
assert isinstance(result, ImportResult)
assert len(result.rows) == 2
assert not result.errors
first = result.rows[0]
assert first.row_number == 2
assert isinstance(first.data, ProjectImportRow)
assert first.data.name == "Project A"
assert first.data.operation_type.value == "open_pit"
second = result.rows[1]
assert second.row_number == 3
assert isinstance(second.data, ProjectImportRow)
assert second.data.location is None
def test_load_scenario_imports_from_excel() -> None:
df = pd.DataFrame(
[
{
"project_name": "Project A",
"name": "Scenario 1",
"status": "Active",
"start_date": "2025-01-01",
"end_date": "2025-12-31",
"discount_rate": "7.5%",
"currency": "usd",
"primary_resource": "Electricity",
}
]
)
buffer = BytesIO()
df.to_excel(buffer, index=False)
buffer.seek(0)
result = load_scenario_imports(buffer, "scenarios.xlsx")
assert len(result.rows) == 1
assert not result.errors
row = result.rows[0]
assert row.row_number == 2
assert isinstance(row.data, ScenarioImportRow)
assert row.data.status.value == "active"
assert row.data.currency == "USD"
assert row.data.discount_rate == pytest.approx(7.5)
def test_import_errors_include_row_numbers() -> None:
csv_content = "name,operation_type\n,open pit\n"
stream = BytesIO(csv_content.encode("utf-8"))
result = load_project_imports(stream, "projects.csv")
assert len(result.rows) == 0
assert len(result.errors) == 1
error = result.errors[0]
assert error.row_number == 2
assert error.field == "name"
assert "required" in error.message
def test_project_import_handles_missing_columns() -> None:
csv_content = "name\nProject Only\n"
stream = BytesIO(csv_content.encode("utf-8"))
result = load_project_imports(stream, "projects.csv")
assert result.rows == []
assert len(result.errors) == 1
error = result.errors[0]
assert error.row_number == 2
assert error.field == "operation_type"
def test_project_import_rejects_invalid_operation_type() -> None:
csv_content = "name,operation_type\nProject X,unknown\n"
stream = BytesIO(csv_content.encode("utf-8"))
result = load_project_imports(stream, "projects.csv")
assert len(result.rows) == 0
assert len(result.errors) == 1
error = result.errors[0]
assert error.row_number == 2
assert error.field == "operation_type"
def test_scenario_import_flags_invalid_dates() -> None:
csv_content = dedent(
"""
project_name,name,status,start_date,end_date
Project A,Scenario Reverse,Draft,2025-12-31,2025-01-01
"""
).strip()
stream = BytesIO(csv_content.encode("utf-8"))
result = load_scenario_imports(stream, "scenarios.csv")
assert len(result.rows) == 0
assert len(result.errors) == 1
error = result.errors[0]
assert error.row_number == 2
assert error.field is None
def test_scenario_import_handles_large_dataset() -> None:
buffer = BytesIO()
df = pd.DataFrame(
{
"project_name": ["Project"] * 500,
"name": [f"Scenario {i}" for i in range(500)],
"status": ["draft"] * 500,
}
)
df.to_csv(buffer, index=False)
buffer.seek(0)
result = load_scenario_imports(buffer, "bulk.csv")
assert len(result.rows) == 500
assert len(result.rows) == 500