extending logging to individual jobs
This commit is contained in:
36
web/db.py
36
web/db.py
@@ -273,11 +273,32 @@ def get_last_fetch_time(page_url: str) -> Optional[datetime]:
|
|||||||
|
|
||||||
|
|
||||||
def upsert_job_details(job_data: Dict[str, Any]):
|
def upsert_job_details(job_data: Dict[str, Any]):
|
||||||
"""Upsert into job_descriptions table using scraped job details dict."""
|
"""Upsert into job_descriptions table using scraped job details dict.
|
||||||
|
|
||||||
|
Behavior additions:
|
||||||
|
- If the provided job `url` has a log entry with fetched_at less than 24 hours ago,
|
||||||
|
the function will skip updating to avoid unnecessary work.
|
||||||
|
- On successful upsert, a log entry is recorded with `insert_log(url, ...)`.
|
||||||
|
"""
|
||||||
url = job_data.get("url")
|
url = job_data.get("url")
|
||||||
job_id = normalize_job_id(job_data.get("id"), url)
|
job_id = normalize_job_id(job_data.get("id"), url)
|
||||||
if not job_id:
|
if not job_id:
|
||||||
return
|
return
|
||||||
|
# Skip if job page was fetched recently (24 hours)
|
||||||
|
try:
|
||||||
|
if isinstance(url, str) and url:
|
||||||
|
last = get_last_fetch_time(url)
|
||||||
|
if last is not None:
|
||||||
|
# normalize tz-awareness
|
||||||
|
from datetime import timezone as _tz
|
||||||
|
now = datetime.now(_tz.utc)
|
||||||
|
last_dt = last if getattr(
|
||||||
|
last, 'tzinfo', None) is not None else last.replace(tzinfo=_tz.utc)
|
||||||
|
if (now - last_dt).total_seconds() < 24 * 3600:
|
||||||
|
return
|
||||||
|
except Exception:
|
||||||
|
# if log lookup fails, proceed normally
|
||||||
|
pass
|
||||||
title = job_data.get("title") or None
|
title = job_data.get("title") or None
|
||||||
company = job_data.get("company") or None
|
company = job_data.get("company") or None
|
||||||
location = job_data.get("location") or None
|
location = job_data.get("location") or None
|
||||||
@@ -297,6 +318,13 @@ def upsert_job_details(job_data: Dict[str, Any]):
|
|||||||
setattr(obj, "posted_time", posted_time)
|
setattr(obj, "posted_time", posted_time)
|
||||||
setattr(obj, "url", url)
|
setattr(obj, "url", url)
|
||||||
session.commit()
|
session.commit()
|
||||||
|
# Record that we fetched/updated this job page
|
||||||
|
try:
|
||||||
|
if isinstance(url, str) and url:
|
||||||
|
insert_log(url, region=None, keyword=None,
|
||||||
|
fetched_at=datetime.now())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def db_get_keywords() -> List[str]:
|
def db_get_keywords() -> List[str]:
|
||||||
@@ -543,7 +571,11 @@ def delete_user_by_id(user_id: int) -> bool:
|
|||||||
result = session.execute(
|
result = session.execute(
|
||||||
text("DELETE FROM users WHERE user_id = :u"), {"u": user_id})
|
text("DELETE FROM users WHERE user_id = :u"), {"u": user_id})
|
||||||
session.commit()
|
session.commit()
|
||||||
return result.rowcount > 0
|
rc = getattr(result, 'rowcount', None)
|
||||||
|
if rc is None:
|
||||||
|
# Unable to determine rowcount; assume success if no exception
|
||||||
|
return True
|
||||||
|
return rc > 0
|
||||||
|
|
||||||
# ---------------- Regions/Keywords helpers ---------------------------------
|
# ---------------- Regions/Keywords helpers ---------------------------------
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user