import os import tempfile import pytest import web.db as db from web.utils import get_cache_dir # Skip unless explicitly enabled (MySQL integration expected) if not os.getenv("RUN_DB_TESTS"): pytest.skip("Set RUN_DB_TESTS=1 to run cache path integration tests", allow_module_level=True) def test_db_sync_inserts_relative_paths(tmp_path, monkeypatch): # arrange: create a temporary cache dir and a fake html file cache_dir = tmp_path / "cache" cache_dir.mkdir() f = cache_dir / "example.org_path_to_page_123.html" f.write_text("ok") # point app at this cache dir monkeypatch.setenv("PYTEST_CACHE_DIR", str(cache_dir)) # monkeypatch get_cache_dir used by db functions monkeypatch.setattr('web.utils.get_cache_dir', lambda: str(cache_dir)) # ensure DB initialized db.db_init() # act db.db_sync_cached_pages(str(cache_dir)) # assert: DB contains relative path, not absolute rows = db.db_get_all_cached_pages() assert any(r['file_path'] == os.path.relpath( str(f), start=str(cache_dir)) for r in rows) def test_normalize_cached_page_paths_converts_absolute(tmp_path, monkeypatch): cache_dir = tmp_path / "cache" cache_dir.mkdir() # create an actual file f = cache_dir / "site_example_page_1.html" f.write_text("ok") monkeypatch.setattr('web.utils.get_cache_dir', lambda: str(cache_dir)) db.db_init() abs_fp = str(f) rel_fp = os.path.relpath(abs_fp, start=str(cache_dir)) # Insert an absolute path row directly (simulate legacy data) with db._ensure_session() as session: session.execute( db.text("INSERT INTO cached_pages(file_path, url_guess, last_modified, size_bytes, job_id) VALUES(:fp, :ug, :lm, :sz, :jid)"), {"fp": abs_fp, "ug": "https://example.org/page1.html", "lm": None, "sz": 10, "jid": None} ) session.commit() # normalize should convert absolute to relative changed = db.normalize_cached_page_paths() assert changed >= 1 rows = db.db_get_all_cached_pages() assert any(r['file_path'] == rel_fp for r in rows)