remove caching

This commit is contained in:
georg.sinn-schirwitz
2025-09-08 14:44:46 +02:00
parent f8e23d0fba
commit 042a196718
13 changed files with 144 additions and 525 deletions

View File

@@ -1,7 +1,7 @@
from datetime import datetime, UTC
from bs4 import BeautifulSoup
from typing import List, Dict, Set
from web.utils import get_base_url, cache_page, safe_get_text, safe_get_attr, is_cached, get_cached_content, make_request_with_retry
from web.utils import get_base_url, safe_get_text, safe_get_attr, make_request_with_retry
def scrape_listings_page(listing, region: str, keyword: str, seen_urls: Set[str]) -> List:
@@ -108,14 +108,7 @@ def scrape_job_data(content: str, region: str, keyword: str, seen_urls: Set[str]
def process_region_keyword(region: str, keyword: str, seen_urls: Set[str]) -> List[List]:
"""Process a single region and keyword."""
url = get_base_url().format(region=region, keyword=keyword.replace(" ", "+"))
if is_cached(url):
content = get_cached_content(url)
cache_status = "CACHED"
else:
content = make_request_with_retry(url, 3)
if content is None:
return []
cache_page(url, content)
cache_status = "FETCHED"
_ = cache_status # no-op to silence unused var
content = make_request_with_retry(url, 3)
if content is None:
return []
return scrape_job_data(content, region, keyword, seen_urls)