simplify caching

This commit is contained in:
georg.sinn-schirwitz
2025-08-29 16:31:10 +02:00
parent d5f686bf1e
commit 2dac771d47
4 changed files with 102 additions and 17 deletions

66
tests/test_cache_paths.py Normal file
View File

@@ -0,0 +1,66 @@
import os
import tempfile
import pytest
import web.db as db
from web.utils import get_cache_dir
# Skip unless explicitly enabled (MySQL integration expected)
if not os.getenv("RUN_DB_TESTS"):
pytest.skip("Set RUN_DB_TESTS=1 to run cache path integration tests",
allow_module_level=True)
def test_db_sync_inserts_relative_paths(tmp_path, monkeypatch):
# arrange: create a temporary cache dir and a fake html file
cache_dir = tmp_path / "cache"
cache_dir.mkdir()
f = cache_dir / "example.org_path_to_page_123.html"
f.write_text("<html>ok</html>")
# point app at this cache dir
monkeypatch.setenv("PYTEST_CACHE_DIR", str(cache_dir))
# monkeypatch get_cache_dir used by db functions
monkeypatch.setattr('web.utils.get_cache_dir', lambda: str(cache_dir))
# ensure DB initialized
db.db_init()
# act
db.db_sync_cached_pages(str(cache_dir))
# assert: DB contains relative path, not absolute
rows = db.db_get_all_cached_pages()
assert any(r['file_path'] == os.path.relpath(
str(f), start=str(cache_dir)) for r in rows)
def test_normalize_cached_page_paths_converts_absolute(tmp_path, monkeypatch):
cache_dir = tmp_path / "cache"
cache_dir.mkdir()
# create an actual file
f = cache_dir / "site_example_page_1.html"
f.write_text("<html>ok</html>")
monkeypatch.setattr('web.utils.get_cache_dir', lambda: str(cache_dir))
db.db_init()
abs_fp = str(f)
rel_fp = os.path.relpath(abs_fp, start=str(cache_dir))
# Insert an absolute path row directly (simulate legacy data)
with db._ensure_session() as session:
session.execute(
db.text("INSERT INTO cached_pages(file_path, url_guess, last_modified, size_bytes, job_id) VALUES(:fp, :ug, :lm, :sz, :jid)"),
{"fp": abs_fp, "ug": "https://example.org/page1.html",
"lm": None, "sz": 10, "jid": None}
)
session.commit()
# normalize should convert absolute to relative
changed = db.normalize_cached_page_paths()
assert changed >= 1
rows = db.db_get_all_cached_pages()
assert any(r['file_path'] == rel_fp for r in rows)