Compare commits
2 Commits
e34e46e19d
...
8ad52563fa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8ad52563fa | ||
|
|
f899439f6a |
53
tests/test_cached_route.py
Normal file
53
tests/test_cached_route.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from web.app import app
|
||||||
|
|
||||||
|
|
||||||
|
def test_cached_route_serves_file(monkeypatch):
|
||||||
|
# Create a temporary file in the configured cache dir
|
||||||
|
cache_dir = os.path.abspath(os.path.join(
|
||||||
|
os.path.dirname(__file__), '..', 'cache'))
|
||||||
|
os.makedirs(cache_dir, exist_ok=True)
|
||||||
|
fd, tmp_path = tempfile.mkstemp(
|
||||||
|
prefix='test_cached_', suffix='.html', dir=cache_dir)
|
||||||
|
os.close(fd)
|
||||||
|
with open(tmp_path, 'w', encoding='utf-8') as f:
|
||||||
|
f.write('<html><body>cached</body></html>')
|
||||||
|
|
||||||
|
# Fake job record returned by get_job_by_id
|
||||||
|
fake_job = {
|
||||||
|
'id': 'fake123',
|
||||||
|
'job_id': 'fake123',
|
||||||
|
'file_path': os.path.relpath(tmp_path, cache_dir),
|
||||||
|
'file_path_abs': tmp_path,
|
||||||
|
}
|
||||||
|
|
||||||
|
def fake_get_job_by_id(jid):
|
||||||
|
if str(jid) in ('fake123',):
|
||||||
|
return fake_job
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Patch the symbol imported into web.app
|
||||||
|
monkeypatch.setattr('web.app.get_job_by_id', fake_get_job_by_id)
|
||||||
|
|
||||||
|
# Request route
|
||||||
|
client = app.test_client()
|
||||||
|
res = client.get('/cached/fake123')
|
||||||
|
assert res.status_code == 200
|
||||||
|
assert b'cached' in res.data
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
try:
|
||||||
|
os.remove(tmp_path)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_cached_route_missing(monkeypatch):
|
||||||
|
def fake_get_job_by_id(jid):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
monkeypatch.setattr('web.app.get_job_by_id', fake_get_job_by_id)
|
||||||
|
client = app.test_client()
|
||||||
|
res = client.get('/cached/nope')
|
||||||
|
assert res.status_code == 404
|
||||||
27
tests/test_cachedpage_abs_path.py
Normal file
27
tests/test_cachedpage_abs_path.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
import os
|
||||||
|
from web.db import CachedPage
|
||||||
|
from web.utils import get_cache_dir
|
||||||
|
|
||||||
|
|
||||||
|
def test_cachedpage_abs_path(tmp_path, monkeypatch):
|
||||||
|
# Create a fake cache dir and monkeypatch get_cache_dir
|
||||||
|
fake_cache = tmp_path / 'cache'
|
||||||
|
fake_cache.mkdir()
|
||||||
|
monkeypatch.setenv('PYTHONIOENCODING', 'utf-8')
|
||||||
|
|
||||||
|
# Patch the symbol used by CachedPage.abs_path (imported into web.db)
|
||||||
|
monkeypatch.setattr('web.db.get_cache_dir', lambda: str(fake_cache))
|
||||||
|
|
||||||
|
# Create a CachedPage instance and set file_path attribute
|
||||||
|
cp = CachedPage()
|
||||||
|
setattr(cp, 'file_path', 'subdir/test.html')
|
||||||
|
|
||||||
|
# Ensure the computed absolute path joins the fake cache dir
|
||||||
|
expected = os.path.join(os.path.abspath(
|
||||||
|
str(fake_cache)), 'subdir/test.html')
|
||||||
|
assert cp.abs_path == expected
|
||||||
|
|
||||||
|
# When file_path is falsy, abs_path should be None
|
||||||
|
cp2 = CachedPage()
|
||||||
|
setattr(cp2, 'file_path', None)
|
||||||
|
assert cp2.abs_path is None
|
||||||
36
web/app.py
36
web/app.py
@@ -1,5 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
from flask import Flask, request, jsonify, render_template, redirect, url_for, session, flash
|
from flask import Flask, request, jsonify, render_template, redirect, url_for, session, flash, send_file
|
||||||
from flask_wtf import CSRFProtect
|
from flask_wtf import CSRFProtect
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
@@ -32,6 +32,7 @@ from web.utils import (
|
|||||||
initialize_users_from_settings,
|
initialize_users_from_settings,
|
||||||
filter_jobs,
|
filter_jobs,
|
||||||
get_job_by_id,
|
get_job_by_id,
|
||||||
|
get_cache_dir,
|
||||||
)
|
)
|
||||||
from web.db import get_all_regions, get_all_keywords
|
from web.db import get_all_regions, get_all_keywords
|
||||||
|
|
||||||
@@ -229,6 +230,39 @@ def job_by_id(job_id):
|
|||||||
return jsonify({"error": "Job not found"}), 404
|
return jsonify({"error": "Job not found"}), 404
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/cached/<job_id>', methods=['GET'])
|
||||||
|
def serve_cached(job_id):
|
||||||
|
"""Serve the cached HTML file for a job if available.
|
||||||
|
|
||||||
|
Uses the job record's `file_path_abs` when present, or resolves the DB `file_path` via helper.
|
||||||
|
Ensures the returned file is located under the configured cache directory to avoid path-traversal.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from web.db import db_get_cached_abs_path
|
||||||
|
j = get_job_by_id(job_id)
|
||||||
|
if not j:
|
||||||
|
return "Job not found", 404
|
||||||
|
|
||||||
|
# Prefer file_path_abs, fall back to resolving the DB-stored file_path
|
||||||
|
abs_fp = j.get('file_path_abs') or None
|
||||||
|
if not abs_fp:
|
||||||
|
db_fp = j.get('file_path')
|
||||||
|
abs_fp = db_get_cached_abs_path(db_fp) if db_fp else None
|
||||||
|
|
||||||
|
if not abs_fp or not os.path.isfile(abs_fp):
|
||||||
|
return "Cached file not available", 404
|
||||||
|
|
||||||
|
cache_dir = os.path.abspath(get_cache_dir())
|
||||||
|
abs_fp = os.path.abspath(abs_fp)
|
||||||
|
# Ensure the file is inside the cache directory
|
||||||
|
if os.path.commonpath([cache_dir, abs_fp]) != cache_dir:
|
||||||
|
return "Forbidden", 403
|
||||||
|
|
||||||
|
return send_file(abs_fp)
|
||||||
|
except Exception:
|
||||||
|
return "Error serving cached file", 500
|
||||||
|
|
||||||
|
|
||||||
@app.route('/jobs/<job_id>/favorite', methods=['POST'])
|
@app.route('/jobs/<job_id>/favorite', methods=['POST'])
|
||||||
def set_favorite(job_id):
|
def set_favorite(job_id):
|
||||||
"""Mark or unmark a job as favorite for a given user.
|
"""Mark or unmark a job as favorite for a given user.
|
||||||
|
|||||||
30
web/db.py
30
web/db.py
@@ -117,6 +117,18 @@ class CachedPage(Base):
|
|||||||
|
|
||||||
listing = relationship("JobListing", back_populates="cached_pages")
|
listing = relationship("JobListing", back_populates="cached_pages")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def abs_path(self) -> Optional[str]:
|
||||||
|
"""Return the absolute filesystem path for this cached page.
|
||||||
|
|
||||||
|
The DB stores `file_path` relative to the configured cache dir. This
|
||||||
|
helper centralizes resolution so callers can use `cached_page.abs_path`.
|
||||||
|
"""
|
||||||
|
fp = getattr(self, 'file_path', None)
|
||||||
|
if not fp:
|
||||||
|
return None
|
||||||
|
return os.path.join(os.path.abspath(get_cache_dir()), fp)
|
||||||
|
|
||||||
|
|
||||||
class UserInteraction(Base):
|
class UserInteraction(Base):
|
||||||
__tablename__ = "user_interactions"
|
__tablename__ = "user_interactions"
|
||||||
@@ -323,16 +335,18 @@ def db_get_all_cached_pages() -> List[Dict[str, Any]]:
|
|||||||
with _ensure_session() as session:
|
with _ensure_session() as session:
|
||||||
rows = session.execute(text(
|
rows = session.execute(text(
|
||||||
"SELECT file_path, url_guess, last_modified, size_bytes, job_id FROM cached_pages")).fetchall()
|
"SELECT file_path, url_guess, last_modified, size_bytes, job_id FROM cached_pages")).fetchall()
|
||||||
return [
|
out = []
|
||||||
{
|
for row in rows:
|
||||||
"file_path": row[0],
|
fp = row[0]
|
||||||
|
out.append({
|
||||||
|
"file_path": fp,
|
||||||
|
"file_path_abs": db_get_cached_abs_path(fp) if fp else None,
|
||||||
"url_guess": row[1],
|
"url_guess": row[1],
|
||||||
"last_modified": row[2],
|
"last_modified": row[2],
|
||||||
"size_bytes": row[3],
|
"size_bytes": row[3],
|
||||||
"job_id": row[4],
|
"job_id": row[4],
|
||||||
}
|
})
|
||||||
for row in rows
|
return out
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def db_get_cache_url(url: str):
|
def db_get_cache_url(url: str):
|
||||||
@@ -346,8 +360,10 @@ def db_get_cache_url(url: str):
|
|||||||
"SELECT file_path, url_guess, last_modified, size_bytes, job_id FROM cached_pages WHERE url_guess = :u"), {"u": url}).fetchone()
|
"SELECT file_path, url_guess, last_modified, size_bytes, job_id FROM cached_pages WHERE url_guess = :u"), {"u": url}).fetchone()
|
||||||
if not row:
|
if not row:
|
||||||
return None
|
return None
|
||||||
|
fp = row[0]
|
||||||
return {
|
return {
|
||||||
"file_path": row[0],
|
"file_path": fp,
|
||||||
|
"file_path_abs": db_get_cached_abs_path(fp) if fp else None,
|
||||||
"url_guess": row[1],
|
"url_guess": row[1],
|
||||||
"last_modified": row[2],
|
"last_modified": row[2],
|
||||||
"size_bytes": row[3],
|
"size_bytes": row[3],
|
||||||
|
|||||||
@@ -46,6 +46,11 @@
|
|||||||
<p class="job-posted-time">{{ job['posted_time'] }}</p>
|
<p class="job-posted-time">{{ job['posted_time'] }}</p>
|
||||||
<span class="job-region region-{{ job['region'] }}">{{ job['region'] }}</span>
|
<span class="job-region region-{{ job['region'] }}">{{ job['region'] }}</span>
|
||||||
<span class="job-keyword keyword-{{ job['keyword']|replace(' ', '')|lower }}">{{ job['keyword'] }}</span>
|
<span class="job-keyword keyword-{{ job['keyword']|replace(' ', '')|lower }}">{{ job['keyword'] }}</span>
|
||||||
|
{% if job.get('file_path_abs') or job.get('file_path') %}
|
||||||
|
<div class="job-cached">
|
||||||
|
<a href="{{ url_for('serve_cached', job_id=job.get('id') or job.get('job_id')) }}" target="_blank">Cached</a>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -23,5 +23,13 @@ styles %}{% endblock %} {% block content %}
|
|||||||
>{{ job.title }}</a
|
>{{ job.title }}</a
|
||||||
>
|
>
|
||||||
</p>
|
</p>
|
||||||
|
{% if job.file_path_abs or job.file_path %}
|
||||||
|
<p>
|
||||||
|
<strong>Cached copy:</strong>
|
||||||
|
<a href="{{ url_for('serve_cached', job_id=job.id) }}" target="_blank"
|
||||||
|
>View cached copy</a
|
||||||
|
>
|
||||||
|
</p>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|||||||
Reference in New Issue
Block a user