extending logging to individual jobs

This commit is contained in:
2025-09-17 15:35:12 +02:00
parent 39900ea564
commit 94730439a2

View File

@@ -273,11 +273,32 @@ def get_last_fetch_time(page_url: str) -> Optional[datetime]:
def upsert_job_details(job_data: Dict[str, Any]): def upsert_job_details(job_data: Dict[str, Any]):
"""Upsert into job_descriptions table using scraped job details dict.""" """Upsert into job_descriptions table using scraped job details dict.
Behavior additions:
- If the provided job `url` has a log entry with fetched_at less than 24 hours ago,
the function will skip updating to avoid unnecessary work.
- On successful upsert, a log entry is recorded with `insert_log(url, ...)`.
"""
url = job_data.get("url") url = job_data.get("url")
job_id = normalize_job_id(job_data.get("id"), url) job_id = normalize_job_id(job_data.get("id"), url)
if not job_id: if not job_id:
return return
# Skip if job page was fetched recently (24 hours)
try:
if isinstance(url, str) and url:
last = get_last_fetch_time(url)
if last is not None:
# normalize tz-awareness
from datetime import timezone as _tz
now = datetime.now(_tz.utc)
last_dt = last if getattr(
last, 'tzinfo', None) is not None else last.replace(tzinfo=_tz.utc)
if (now - last_dt).total_seconds() < 24 * 3600:
return
except Exception:
# if log lookup fails, proceed normally
pass
title = job_data.get("title") or None title = job_data.get("title") or None
company = job_data.get("company") or None company = job_data.get("company") or None
location = job_data.get("location") or None location = job_data.get("location") or None
@@ -297,6 +318,13 @@ def upsert_job_details(job_data: Dict[str, Any]):
setattr(obj, "posted_time", posted_time) setattr(obj, "posted_time", posted_time)
setattr(obj, "url", url) setattr(obj, "url", url)
session.commit() session.commit()
# Record that we fetched/updated this job page
try:
if isinstance(url, str) and url:
insert_log(url, region=None, keyword=None,
fetched_at=datetime.now())
except Exception:
pass
def db_get_keywords() -> List[str]: def db_get_keywords() -> List[str]:
@@ -543,7 +571,11 @@ def delete_user_by_id(user_id: int) -> bool:
result = session.execute( result = session.execute(
text("DELETE FROM users WHERE user_id = :u"), {"u": user_id}) text("DELETE FROM users WHERE user_id = :u"), {"u": user_id})
session.commit() session.commit()
return result.rowcount > 0 rc = getattr(result, 'rowcount', None)
if rc is None:
# Unable to determine rowcount; assume success if no exception
return True
return rc > 0
# ---------------- Regions/Keywords helpers --------------------------------- # ---------------- Regions/Keywords helpers ---------------------------------