extending logging
This commit is contained in:
@@ -25,7 +25,7 @@ def fetch_listings():
|
||||
"""Fetch job listings from all regions and keywords."""
|
||||
# We'll collect URLs discovered in this run and then remove any DB listings
|
||||
# not present in this set (treat DB as reflecting current search results).
|
||||
existing_db_urls = set(db_get_all_job_urls())
|
||||
existing_db_urls = set(row['url'] for row in db_get_all_job_urls())
|
||||
discovered_urls = set()
|
||||
new_rows = []
|
||||
|
||||
@@ -98,7 +98,7 @@ def fetch_listings():
|
||||
return {"discovered": len(discovered_urls), "new": len(new_rows)}
|
||||
|
||||
|
||||
def process_job_url(job_url: str):
|
||||
def process_job_url(job_url: str, region: str = "", keyword: str = ""):
|
||||
try:
|
||||
job_id = url_to_job_id(job_url)
|
||||
yield f"Fetching job page: {job_url}\n"
|
||||
@@ -113,7 +113,7 @@ def process_job_url(job_url: str):
|
||||
job_data = scrape_job_page(content, job_url)
|
||||
if job_data:
|
||||
yield f"Upserting job details for {job_id}\n"
|
||||
upsert_job_details(job_data)
|
||||
upsert_job_details(job_data, region=region, keyword=keyword)
|
||||
upsert_user_interaction(
|
||||
job_id, seen_at=datetime.now(timezone.utc).isoformat())
|
||||
yield f"Successfully processed job {job_id}: {job_data.get('title', 'Unknown')}\n"
|
||||
@@ -141,9 +141,11 @@ def scraper():
|
||||
job_urls = db_get_all_job_urls()
|
||||
yield f"Processing {len(job_urls)} job pages...\n"
|
||||
|
||||
for i, url in enumerate(job_urls, 1):
|
||||
i = 0
|
||||
for url, region, keyword in job_urls:
|
||||
i += 1
|
||||
yield f"\n--- Processing job {i}/{len(job_urls)} ---\n"
|
||||
for message in process_job_url(url):
|
||||
for message in process_job_url(job_url=url, region=region, keyword=keyword):
|
||||
yield message
|
||||
|
||||
yield "\nScraping completed successfully!\n"
|
||||
|
||||
15
web/db.py
15
web/db.py
@@ -272,7 +272,7 @@ def get_last_fetch_time(page_url: str) -> Optional[datetime]:
|
||||
return None
|
||||
|
||||
|
||||
def upsert_job_details(job_data: Dict[str, Any]):
|
||||
def upsert_job_details(job_data: Dict[str, Any], region: str = "", keyword: str = ""):
|
||||
"""Upsert into job_descriptions table using scraped job details dict.
|
||||
|
||||
Behavior additions:
|
||||
@@ -381,11 +381,16 @@ ORDER BY d.posted_time DESC
|
||||
return jobs
|
||||
|
||||
|
||||
def db_get_all_job_urls() -> List[str]:
|
||||
"""Return list of job URLs from job_listings."""
|
||||
def db_get_all_job_urls() -> List[dict]:
|
||||
"""Return list of job URLs from job_listings.
|
||||
|
||||
Returns:
|
||||
- List of dicts with keys: url, region, keyword
|
||||
"""
|
||||
with _ensure_session() as session:
|
||||
rows = session.execute(text("SELECT url FROM job_listings")).fetchall()
|
||||
return [r[0] for r in rows]
|
||||
rows = session.execute(
|
||||
text("SELECT url, region, keyword FROM job_listings")).fetchall()
|
||||
return [{"url": r[0], "region": r[1], "keyword": r[2]} for r in rows]
|
||||
|
||||
|
||||
def db_delete_job(job_id: str | int):
|
||||
|
||||
Reference in New Issue
Block a user