remove caching

This commit is contained in:
georg.sinn-schirwitz
2025-09-08 14:44:46 +02:00
parent f8e23d0fba
commit 042a196718
13 changed files with 144 additions and 525 deletions

View File

@@ -1,66 +0,0 @@
import os
import tempfile
import pytest
import web.db as db
from web.utils import get_cache_dir
# Skip unless explicitly enabled (MySQL integration expected)
if not os.getenv("RUN_DB_TESTS"):
pytest.skip("Set RUN_DB_TESTS=1 to run cache path integration tests",
allow_module_level=True)
def test_db_sync_inserts_relative_paths(tmp_path, monkeypatch):
# arrange: create a temporary cache dir and a fake html file
cache_dir = tmp_path / "cache"
cache_dir.mkdir()
f = cache_dir / "example.org_path_to_page_123.html"
f.write_text("<html>ok</html>")
# point app at this cache dir
monkeypatch.setenv("PYTEST_CACHE_DIR", str(cache_dir))
# monkeypatch get_cache_dir used by db functions
monkeypatch.setattr('web.utils.get_cache_dir', lambda: str(cache_dir))
# ensure DB initialized
db.db_init()
# act
db.db_sync_cached_pages(str(cache_dir))
# assert: DB contains relative path, not absolute
rows = db.db_get_all_cached_pages()
assert any(r['file_path'] == os.path.relpath(
str(f), start=str(cache_dir)) for r in rows)
def test_normalize_cached_page_paths_converts_absolute(tmp_path, monkeypatch):
cache_dir = tmp_path / "cache"
cache_dir.mkdir()
# create an actual file
f = cache_dir / "site_example_page_1.html"
f.write_text("<html>ok</html>")
monkeypatch.setattr('web.utils.get_cache_dir', lambda: str(cache_dir))
db.db_init()
abs_fp = str(f)
rel_fp = os.path.relpath(abs_fp, start=str(cache_dir))
# Insert an absolute path row directly (simulate legacy data)
with db._ensure_session() as session:
session.execute(
db.text("INSERT INTO cached_pages(file_path, url_guess, last_modified, size_bytes, job_id) VALUES(:fp, :ug, :lm, :sz, :jid)"),
{"fp": abs_fp, "ug": "https://example.org/page1.html",
"lm": None, "sz": 10, "jid": None}
)
session.commit()
# normalize should convert absolute to relative
changed = db.normalize_cached_page_paths()
assert changed >= 1
rows = db.db_get_all_cached_pages()
assert any(r['file_path'] == rel_fp for r in rows)

View File

@@ -1,53 +0,0 @@
import os
import tempfile
from web.app import app
def test_cached_route_serves_file(monkeypatch):
# Create a temporary file in the configured cache dir
cache_dir = os.path.abspath(os.path.join(
os.path.dirname(__file__), '..', 'cache'))
os.makedirs(cache_dir, exist_ok=True)
fd, tmp_path = tempfile.mkstemp(
prefix='test_cached_', suffix='.html', dir=cache_dir)
os.close(fd)
with open(tmp_path, 'w', encoding='utf-8') as f:
f.write('<html><body>cached</body></html>')
# Fake job record returned by get_job_by_id
fake_job = {
'id': 'fake123',
'job_id': 'fake123',
'file_path': os.path.relpath(tmp_path, cache_dir),
'file_path_abs': tmp_path,
}
def fake_get_job_by_id(jid):
if str(jid) in ('fake123',):
return fake_job
return {}
# Patch the symbol imported into web.app
monkeypatch.setattr('web.app.get_job_by_id', fake_get_job_by_id)
# Request route
client = app.test_client()
res = client.get('/cached/fake123')
assert res.status_code == 200
assert b'cached' in res.data
# Cleanup
try:
os.remove(tmp_path)
except Exception:
pass
def test_cached_route_missing(monkeypatch):
def fake_get_job_by_id(jid):
return {}
monkeypatch.setattr('web.app.get_job_by_id', fake_get_job_by_id)
client = app.test_client()
res = client.get('/cached/nope')
assert res.status_code == 404

View File

@@ -1,27 +0,0 @@
import os
from web.db import CachedPage
from web.utils import get_cache_dir
def test_cachedpage_abs_path(tmp_path, monkeypatch):
# Create a fake cache dir and monkeypatch get_cache_dir
fake_cache = tmp_path / 'cache'
fake_cache.mkdir()
monkeypatch.setenv('PYTHONIOENCODING', 'utf-8')
# Patch the symbol used by CachedPage.abs_path (imported into web.db)
monkeypatch.setattr('web.db.get_cache_dir', lambda: str(fake_cache))
# Create a CachedPage instance and set file_path attribute
cp = CachedPage()
setattr(cp, 'file_path', 'subdir/test.html')
# Ensure the computed absolute path joins the fake cache dir
expected = os.path.join(os.path.abspath(
str(fake_cache)), 'subdir/test.html')
assert cp.abs_path == expected
# When file_path is falsy, abs_path should be None
cp2 = CachedPage()
setattr(cp2, 'file_path', None)
assert cp2.abs_path is None

View File

@@ -95,39 +95,6 @@ def test_upsert_listing_details_and_urls(db_ready):
pass
def test_cached_page_upsert_and_get(db_ready):
jid_suffix = unique_suffix()
url = f"https://example.org/it/{jid_suffix}.html"
# Ensure a listing exists for FK relation if enforced
db.upsert_listing(
url=url,
region="it",
keyword="cache",
title=f"IT Cache {jid_suffix}",
pay="N/A",
location="Test City",
timestamp=now_iso(),
)
fp = f"/tmp/integration_{jid_suffix}.html"
db.upsert_cached_page(
file_path=fp,
url_guess=url,
last_modified=now_iso(),
size_bytes=123,
job_id=int(jid_suffix) if jid_suffix.isdigit() else None,
)
row = db.db_get_cache_url(url)
if row is not None:
assert row["url_guess"] == url
# Cleanup
try:
db.remove_cached_page(fp)
db.db_remove_cached_url(url)
db.db_delete_job(jid_suffix)
except Exception:
pass
def test_user_interactions_mark_and_visit(db_ready):
uname = f"it_user_{unique_suffix()}"
db.create_or_update_user(uname, is_active=True)