diff --git a/.gitignore b/.gitignore index ebe7096..0e20687 100644 --- a/.gitignore +++ b/.gitignore @@ -166,3 +166,4 @@ cython_debug/ docs/online.md .github/copilot* .github/TODO.md +.vscode/launch.json diff --git a/web/craigslist.py b/web/craigslist.py index 0379cef..3bf66c8 100644 --- a/web/craigslist.py +++ b/web/craigslist.py @@ -175,10 +175,10 @@ def fetch_listings(): def process_job_url(job_url: str, region: str = "", keyword: str = ""): last = get_last_fetch_time(job_url) if last is not None: - # skip if fetched within the last hour + # skip if fetched within the last 24 hours age = datetime.now( timezone.utc) - (last if last.tzinfo is not None else last.replace(tzinfo=timezone.utc)) - if age.total_seconds() < 1 * 3600: + if age.total_seconds() < 24 * 3600: yield f"Skipping job {job_url} (fetched {age.seconds//3600}h ago)...\n" return None diff --git a/web/scraper.py b/web/scraper.py index 6d736a5..fe31d8a 100644 --- a/web/scraper.py +++ b/web/scraper.py @@ -224,7 +224,7 @@ def scrape_job_data(content: str, region: str, keyword: str, seen_urls: Set[str] def process_region_keyword(region: str, keyword: str, seen_urls: Set[str]) -> List[List]: """Process a single region and keyword.""" url = get_base_url().format(region=region, keyword=keyword.replace(" ", "+")) - content = make_request_with_retry(url, 3) + content = make_request_with_retry(url, 1) if content is None: return [] return scrape_job_data(content, region, keyword, seen_urls)