updating db logic adding logging

This commit is contained in:
2025-09-17 15:24:59 +02:00
parent c4a5ed56b5
commit e26dc9c164
3 changed files with 62 additions and 25 deletions

View File

@@ -9,6 +9,8 @@ from web.db import (
db_get_all_job_urls,
db_delete_job,
remove_job,
insert_log,
get_last_fetch_time,
)
# Import utility functions
@@ -51,8 +53,29 @@ def fetch_listings():
keyword_name = keyword.get("name")
if not keyword_name:
continue
# Build a canonical search identifier for this region+keyword combination.
search_page_id = f"search:{region_name}:{keyword_name}"
try:
last = get_last_fetch_time(search_page_id)
if last is not None:
# skip if fetched within the last 24 hours
age = datetime.now(
timezone.utc) - (last if last.tzinfo is not None else last.replace(tzinfo=timezone.utc))
if age.total_seconds() < 24 * 3600:
yield f"Skipping {region_name} + {keyword_name} (fetched {age.seconds//3600}h ago)...\n"
processed += 1
continue
except Exception:
# if logging lookup fails, proceed with fetch
pass
processed += 1
yield f"Processing {region_name} + {keyword_name} ({processed}/{total_combinations})...\n"
# record that we're fetching this search page now
try:
insert_log(search_page_id, region=region_name,
keyword=keyword_name, fetched_at=datetime.now(timezone.utc))
except Exception:
pass
for row in process_region_keyword(region_name, keyword_name, discovered_urls):
timestamp, region, keyword, title, pay, location, url = row
discovered_urls.add(url)
@@ -67,6 +90,8 @@ def fetch_listings():
pay=pay,
location=location,
timestamp=timestamp,
fetched_from=search_page_id,
fetched_at=datetime.now(timezone.utc),
)
# Remove stale listings: those present in DB but not discovered now.