"""Seed baseline data for CalMiner in an idempotent manner. Usage examples -------------- ```powershell # Use existing environment variables (or load from setup_test.env.example) python scripts/seed_data.py --currencies --units --defaults # Dry-run to preview actions python scripts/seed_data.py --currencies --dry-run ``` """ from __future__ import annotations import argparse import logging import os from typing import Iterable, Optional import psycopg2 from psycopg2 import errors from psycopg2.extras import execute_values from scripts.setup_database import DatabaseConfig logger = logging.getLogger(__name__) CURRENCY_SEEDS = ( ("USD", "United States Dollar", "USD$", True), ("EUR", "Euro", "EUR", True), ("CLP", "Chilean Peso", "CLP$", True), ("RMB", "Chinese Yuan", "RMB", True), ("GBP", "British Pound", "GBP", True), ("CAD", "Canadian Dollar", "CAD$", True), ("AUD", "Australian Dollar", "AUD$", True), ) MEASUREMENT_UNIT_SEEDS = ( ("tonnes", "Tonnes", "t", "mass", True), ("kilograms", "Kilograms", "kg", "mass", True), ("pounds", "Pounds", "lb", "mass", True), ("liters", "Liters", "L", "volume", True), ("cubic_meters", "Cubic Meters", "m3", "volume", True), ("kilowatt_hours", "Kilowatt Hours", "kWh", "energy", True), ) def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Seed baseline CalMiner data") parser.add_argument("--currencies", action="store_true", help="Seed currency table") parser.add_argument("--units", action="store_true", help="Seed unit table") parser.add_argument("--defaults", action="store_true", help="Seed default records") parser.add_argument("--dry-run", action="store_true", help="Print actions without executing") parser.add_argument( "--verbose", "-v", action="count", default=0, help="Increase logging verbosity" ) return parser.parse_args() def _configure_logging(args: argparse.Namespace) -> None: level = logging.WARNING - (10 * min(args.verbose, 2)) logging.basicConfig(level=max(level, logging.INFO), format="%(levelname)s %(message)s") def main() -> None: args = parse_args() run_with_namespace(args) def run_with_namespace( args: argparse.Namespace, *, config: Optional[DatabaseConfig] = None, ) -> None: _configure_logging(args) if not any((args.currencies, args.units, args.defaults)): logger.info("No seeding options provided; exiting") return config = config or DatabaseConfig.from_env() with psycopg2.connect(config.application_dsn()) as conn: conn.autocommit = True with conn.cursor() as cursor: if args.currencies: _seed_currencies(cursor, dry_run=args.dry_run) if args.units: _seed_units(cursor, dry_run=args.dry_run) if args.defaults: _seed_defaults(cursor, dry_run=args.dry_run) def _seed_currencies(cursor, *, dry_run: bool) -> None: logger.info("Seeding currency table (%d rows)", len(CURRENCY_SEEDS)) if dry_run: for code, name, symbol, active in CURRENCY_SEEDS: logger.info("Dry run: would upsert currency %s (%s)", code, name) return execute_values( cursor, """ INSERT INTO currency (code, name, symbol, is_active) VALUES %s ON CONFLICT (code) DO UPDATE SET name = EXCLUDED.name, symbol = EXCLUDED.symbol, is_active = EXCLUDED.is_active """, CURRENCY_SEEDS, ) logger.info("Currency seed complete") def _seed_units(cursor, *, dry_run: bool) -> None: total = len(MEASUREMENT_UNIT_SEEDS) logger.info("Seeding measurement_unit table (%d rows)", total) if dry_run: for code, name, symbol, unit_type, _ in MEASUREMENT_UNIT_SEEDS: logger.info( "Dry run: would upsert measurement unit %s (%s - %s)", code, name, unit_type, ) return try: execute_values( cursor, """ INSERT INTO measurement_unit (code, name, symbol, unit_type, is_active) VALUES %s ON CONFLICT (code) DO UPDATE SET name = EXCLUDED.name, symbol = EXCLUDED.symbol, unit_type = EXCLUDED.unit_type, is_active = EXCLUDED.is_active """, MEASUREMENT_UNIT_SEEDS, ) except errors.UndefinedTable: logger.warning( "measurement_unit table does not exist; skipping unit seeding." ) cursor.connection.rollback() return logger.info("Measurement unit seed complete") def _seed_defaults(cursor, *, dry_run: bool) -> None: logger.info("Seeding default records - not yet implemented") if dry_run: return if __name__ == "__main__": main()