remove caching
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
from datetime import datetime, UTC
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import List, Dict, Set
|
||||
from web.utils import get_base_url, cache_page, safe_get_text, safe_get_attr, is_cached, get_cached_content, make_request_with_retry
|
||||
from web.utils import get_base_url, safe_get_text, safe_get_attr, make_request_with_retry
|
||||
|
||||
|
||||
def scrape_listings_page(listing, region: str, keyword: str, seen_urls: Set[str]) -> List:
|
||||
@@ -108,14 +108,7 @@ def scrape_job_data(content: str, region: str, keyword: str, seen_urls: Set[str]
|
||||
def process_region_keyword(region: str, keyword: str, seen_urls: Set[str]) -> List[List]:
|
||||
"""Process a single region and keyword."""
|
||||
url = get_base_url().format(region=region, keyword=keyword.replace(" ", "+"))
|
||||
if is_cached(url):
|
||||
content = get_cached_content(url)
|
||||
cache_status = "CACHED"
|
||||
else:
|
||||
content = make_request_with_retry(url, 3)
|
||||
if content is None:
|
||||
return []
|
||||
cache_page(url, content)
|
||||
cache_status = "FETCHED"
|
||||
_ = cache_status # no-op to silence unused var
|
||||
content = make_request_with_retry(url, 3)
|
||||
if content is None:
|
||||
return []
|
||||
return scrape_job_data(content, region, keyword, seen_urls)
|
||||
|
||||
Reference in New Issue
Block a user