extending abs_path in db and test coverage

This commit is contained in:
georg.sinn-schirwitz
2025-08-30 13:06:16 +02:00
parent f899439f6a
commit 8ad52563fa
3 changed files with 103 additions and 7 deletions

View File

@@ -0,0 +1,53 @@
import os
import tempfile
from web.app import app
def test_cached_route_serves_file(monkeypatch):
# Create a temporary file in the configured cache dir
cache_dir = os.path.abspath(os.path.join(
os.path.dirname(__file__), '..', 'cache'))
os.makedirs(cache_dir, exist_ok=True)
fd, tmp_path = tempfile.mkstemp(
prefix='test_cached_', suffix='.html', dir=cache_dir)
os.close(fd)
with open(tmp_path, 'w', encoding='utf-8') as f:
f.write('<html><body>cached</body></html>')
# Fake job record returned by get_job_by_id
fake_job = {
'id': 'fake123',
'job_id': 'fake123',
'file_path': os.path.relpath(tmp_path, cache_dir),
'file_path_abs': tmp_path,
}
def fake_get_job_by_id(jid):
if str(jid) in ('fake123',):
return fake_job
return {}
# Patch the symbol imported into web.app
monkeypatch.setattr('web.app.get_job_by_id', fake_get_job_by_id)
# Request route
client = app.test_client()
res = client.get('/cached/fake123')
assert res.status_code == 200
assert b'cached' in res.data
# Cleanup
try:
os.remove(tmp_path)
except Exception:
pass
def test_cached_route_missing(monkeypatch):
def fake_get_job_by_id(jid):
return {}
monkeypatch.setattr('web.app.get_job_by_id', fake_get_job_by_id)
client = app.test_client()
res = client.get('/cached/nope')
assert res.status_code == 404

View File

@@ -0,0 +1,27 @@
import os
from web.db import CachedPage
from web.utils import get_cache_dir
def test_cachedpage_abs_path(tmp_path, monkeypatch):
# Create a fake cache dir and monkeypatch get_cache_dir
fake_cache = tmp_path / 'cache'
fake_cache.mkdir()
monkeypatch.setenv('PYTHONIOENCODING', 'utf-8')
# Patch the symbol used by CachedPage.abs_path (imported into web.db)
monkeypatch.setattr('web.db.get_cache_dir', lambda: str(fake_cache))
# Create a CachedPage instance and set file_path attribute
cp = CachedPage()
setattr(cp, 'file_path', 'subdir/test.html')
# Ensure the computed absolute path joins the fake cache dir
expected = os.path.join(os.path.abspath(
str(fake_cache)), 'subdir/test.html')
assert cp.abs_path == expected
# When file_path is falsy, abs_path should be None
cp2 = CachedPage()
setattr(cp2, 'file_path', None)
assert cp2.abs_path is None

View File

@@ -117,6 +117,18 @@ class CachedPage(Base):
listing = relationship("JobListing", back_populates="cached_pages") listing = relationship("JobListing", back_populates="cached_pages")
@property
def abs_path(self) -> Optional[str]:
"""Return the absolute filesystem path for this cached page.
The DB stores `file_path` relative to the configured cache dir. This
helper centralizes resolution so callers can use `cached_page.abs_path`.
"""
fp = getattr(self, 'file_path', None)
if not fp:
return None
return os.path.join(os.path.abspath(get_cache_dir()), fp)
class UserInteraction(Base): class UserInteraction(Base):
__tablename__ = "user_interactions" __tablename__ = "user_interactions"
@@ -323,16 +335,18 @@ def db_get_all_cached_pages() -> List[Dict[str, Any]]:
with _ensure_session() as session: with _ensure_session() as session:
rows = session.execute(text( rows = session.execute(text(
"SELECT file_path, url_guess, last_modified, size_bytes, job_id FROM cached_pages")).fetchall() "SELECT file_path, url_guess, last_modified, size_bytes, job_id FROM cached_pages")).fetchall()
return [ out = []
{ for row in rows:
"file_path": row[0], fp = row[0]
out.append({
"file_path": fp,
"file_path_abs": db_get_cached_abs_path(fp) if fp else None,
"url_guess": row[1], "url_guess": row[1],
"last_modified": row[2], "last_modified": row[2],
"size_bytes": row[3], "size_bytes": row[3],
"job_id": row[4], "job_id": row[4],
} })
for row in rows return out
]
def db_get_cache_url(url: str): def db_get_cache_url(url: str):
@@ -346,8 +360,10 @@ def db_get_cache_url(url: str):
"SELECT file_path, url_guess, last_modified, size_bytes, job_id FROM cached_pages WHERE url_guess = :u"), {"u": url}).fetchone() "SELECT file_path, url_guess, last_modified, size_bytes, job_id FROM cached_pages WHERE url_guess = :u"), {"u": url}).fetchone()
if not row: if not row:
return None return None
fp = row[0]
return { return {
"file_path": row[0], "file_path": fp,
"file_path_abs": db_get_cached_abs_path(fp) if fp else None,
"url_guess": row[1], "url_guess": row[1],
"last_modified": row[2], "last_modified": row[2],
"size_bytes": row[3], "size_bytes": row[3],