extending logging
This commit is contained in:
@@ -25,7 +25,7 @@ def fetch_listings():
|
||||
"""Fetch job listings from all regions and keywords."""
|
||||
# We'll collect URLs discovered in this run and then remove any DB listings
|
||||
# not present in this set (treat DB as reflecting current search results).
|
||||
existing_db_urls = set(db_get_all_job_urls())
|
||||
existing_db_urls = set(row['url'] for row in db_get_all_job_urls())
|
||||
discovered_urls = set()
|
||||
new_rows = []
|
||||
|
||||
@@ -98,7 +98,7 @@ def fetch_listings():
|
||||
return {"discovered": len(discovered_urls), "new": len(new_rows)}
|
||||
|
||||
|
||||
def process_job_url(job_url: str):
|
||||
def process_job_url(job_url: str, region: str = "", keyword: str = ""):
|
||||
try:
|
||||
job_id = url_to_job_id(job_url)
|
||||
yield f"Fetching job page: {job_url}\n"
|
||||
@@ -113,7 +113,7 @@ def process_job_url(job_url: str):
|
||||
job_data = scrape_job_page(content, job_url)
|
||||
if job_data:
|
||||
yield f"Upserting job details for {job_id}\n"
|
||||
upsert_job_details(job_data)
|
||||
upsert_job_details(job_data, region=region, keyword=keyword)
|
||||
upsert_user_interaction(
|
||||
job_id, seen_at=datetime.now(timezone.utc).isoformat())
|
||||
yield f"Successfully processed job {job_id}: {job_data.get('title', 'Unknown')}\n"
|
||||
@@ -141,9 +141,11 @@ def scraper():
|
||||
job_urls = db_get_all_job_urls()
|
||||
yield f"Processing {len(job_urls)} job pages...\n"
|
||||
|
||||
for i, url in enumerate(job_urls, 1):
|
||||
i = 0
|
||||
for url, region, keyword in job_urls:
|
||||
i += 1
|
||||
yield f"\n--- Processing job {i}/{len(job_urls)} ---\n"
|
||||
for message in process_job_url(url):
|
||||
for message in process_job_url(job_url=url, region=region, keyword=keyword):
|
||||
yield message
|
||||
|
||||
yield "\nScraping completed successfully!\n"
|
||||
|
||||
Reference in New Issue
Block a user