From 94730439a2bd4d80f233e66308446226e6839b65 Mon Sep 17 00:00:00 2001 From: zwitschi Date: Wed, 17 Sep 2025 15:35:12 +0200 Subject: [PATCH] extending logging to individual jobs --- web/db.py | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/web/db.py b/web/db.py index 7a705bf..2d61576 100644 --- a/web/db.py +++ b/web/db.py @@ -273,11 +273,32 @@ def get_last_fetch_time(page_url: str) -> Optional[datetime]: def upsert_job_details(job_data: Dict[str, Any]): - """Upsert into job_descriptions table using scraped job details dict.""" + """Upsert into job_descriptions table using scraped job details dict. + + Behavior additions: + - If the provided job `url` has a log entry with fetched_at less than 24 hours ago, + the function will skip updating to avoid unnecessary work. + - On successful upsert, a log entry is recorded with `insert_log(url, ...)`. + """ url = job_data.get("url") job_id = normalize_job_id(job_data.get("id"), url) if not job_id: return + # Skip if job page was fetched recently (24 hours) + try: + if isinstance(url, str) and url: + last = get_last_fetch_time(url) + if last is not None: + # normalize tz-awareness + from datetime import timezone as _tz + now = datetime.now(_tz.utc) + last_dt = last if getattr( + last, 'tzinfo', None) is not None else last.replace(tzinfo=_tz.utc) + if (now - last_dt).total_seconds() < 24 * 3600: + return + except Exception: + # if log lookup fails, proceed normally + pass title = job_data.get("title") or None company = job_data.get("company") or None location = job_data.get("location") or None @@ -297,6 +318,13 @@ def upsert_job_details(job_data: Dict[str, Any]): setattr(obj, "posted_time", posted_time) setattr(obj, "url", url) session.commit() + # Record that we fetched/updated this job page + try: + if isinstance(url, str) and url: + insert_log(url, region=None, keyword=None, + fetched_at=datetime.now()) + except Exception: + pass def db_get_keywords() -> List[str]: @@ -543,7 +571,11 @@ def delete_user_by_id(user_id: int) -> bool: result = session.execute( text("DELETE FROM users WHERE user_id = :u"), {"u": user_id}) session.commit() - return result.rowcount > 0 + rc = getattr(result, 'rowcount', None) + if rc is None: + # Unable to determine rowcount; assume success if no exception + return True + return rc > 0 # ---------------- Regions/Keywords helpers ---------------------------------