remove caching
This commit is contained in:
54
web/utils.py
54
web/utils.py
@@ -93,8 +93,7 @@ def get_paths() -> dict:
|
||||
return get_config().get('paths', {})
|
||||
|
||||
|
||||
def get_cache_dir() -> str:
|
||||
return get_paths().get('cache_dir', 'cache')
|
||||
|
||||
|
||||
|
||||
def get_logs_dir() -> str:
|
||||
@@ -129,9 +128,7 @@ def get_base_url() -> str:
|
||||
return get_config().get('scraper', {}).get('base_url', "https://{region}.craigslist.org/search/jjj?query={keyword}&sort=rel")
|
||||
|
||||
|
||||
def ensure_cache_dir():
|
||||
"""Ensure cache directory exists."""
|
||||
os.makedirs(get_cache_dir(), exist_ok=True)
|
||||
|
||||
|
||||
|
||||
def now_iso() -> str:
|
||||
@@ -173,10 +170,7 @@ def get_url_from_filename(name: str) -> str:
|
||||
return url_guess
|
||||
|
||||
|
||||
def get_cached_content(url: str) -> str:
|
||||
"""Get cached content for URL."""
|
||||
with open(get_cache_path(url), "r", encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
|
||||
def safe_get_text(element, default="N/A"):
|
||||
@@ -194,51 +188,19 @@ def get_random_delay(min_delay: int = get_min_delay(), max_delay: int = get_max_
|
||||
return random.uniform(min_delay, max_delay)
|
||||
|
||||
|
||||
def get_cache_path(url: str) -> str:
|
||||
"""Get cache file path for URL."""
|
||||
return os.path.join(get_cache_dir(), f"{get_filename_from_url(url)}.html")
|
||||
|
||||
|
||||
def cache_page(url: str, content: str):
|
||||
"""Cache the page content with a timestamp."""
|
||||
cache_path = get_cache_path(url)
|
||||
with open(cache_path, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
# Update the file's modification time to the current time
|
||||
os.utime(cache_path, None)
|
||||
|
||||
|
||||
def is_cached(url: str) -> bool:
|
||||
"""Check if the page is cached and not older than 24 hours."""
|
||||
cache_path = get_cache_path(url)
|
||||
if not os.path.isfile(cache_path):
|
||||
return False
|
||||
|
||||
# Check the file's age if it's a search result page
|
||||
if 'search' in url:
|
||||
file_age = time.time() - os.path.getmtime(cache_path)
|
||||
if file_age > 24 * 3600: # 24 hours in seconds
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_cache_stale(last_modified: str, days: int = 1) -> bool:
|
||||
"""Check if the cached page is stale (older than 24 hours)."""
|
||||
if not last_modified:
|
||||
return True
|
||||
last_datetime = datetime.fromisoformat(last_modified)
|
||||
file_age = time.time() - last_datetime.timestamp()
|
||||
return file_age > days * 24 * 3600 # days in seconds
|
||||
|
||||
|
||||
def delete_cached_page(url: str):
|
||||
cache_fp = get_cache_path(url)
|
||||
if os.path.exists(cache_fp):
|
||||
try:
|
||||
os.remove(cache_fp)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def get_color_from_string(s: str) -> str:
|
||||
|
||||
Reference in New Issue
Block a user