feat: Implement email sending utilities and templates for job notifications
Some checks failed
CI/CD Pipeline / test (push) Failing after 4m9s

- Added email_service.py for sending emails with SMTP configuration.
- Introduced email_templates.py to render job alert email subjects and bodies.
- Enhanced scraper.py to extract contact information from job listings.
- Updated settings.js to handle negative keyword input validation.
- Created email.html and email_templates.html for managing email subscriptions and templates in the admin interface.
- Modified base.html to include links for email alerts and templates.
- Expanded user settings.html to allow management of negative keywords.
- Updated utils.py to include functions for retrieving negative keywords and email settings.
- Enhanced job filtering logic to exclude jobs containing negative keywords.
This commit is contained in:
2025-11-28 18:15:08 +01:00
parent 8afb208985
commit 2185a07ff0
23 changed files with 2660 additions and 63 deletions

View File

@@ -18,18 +18,67 @@ import time
from web.utils import (
get_base_url,
make_request_with_retry,
now_iso,
get_email_settings,
)
from web.db import get_all_regions, get_all_keywords, seed_regions_keywords_from_listings
from web.email_templates import render_job_alert_email
from web.email_service import send_email
def _negative_match_details(job_data: dict) -> tuple[str, str] | None:
"""Return (keyword, field) when job_data indicates a negative match."""
if not job_data or not job_data.get("is_negative_match"):
return None
keyword = (job_data.get("negative_keyword_match") or "").strip()
field = (job_data.get("negative_match_field")
or "unknown").strip() or "unknown"
if not keyword:
keyword = "unknown keyword"
return keyword, field
def _send_new_job_alert(new_jobs: list[dict]) -> tuple[bool, str]:
"""Send an email alert for newly discovered jobs.
Returns (sent, message) where message explains why mail was skipped.
"""
settings = get_email_settings()
if not settings.get("enabled"):
return False, "email alerts disabled"
recipients = settings.get("recipients", []) or []
if not recipients:
return False, "no recipients configured"
payload = render_job_alert_email(new_jobs)
send_email(
subject=payload.get("subject", "New jobs available"),
body=payload.get("body", ""),
to=recipients,
settings=settings,
)
return True, "sent"
def fetch_listings():
"""Fetch job listings from all regions and keywords."""
"""Fetch job listings from all regions and keywords.
Yields progress messages and returns a dict with:
- discovered: total number of unique job URLs discovered
- new: total number of new jobs added to the database
- by_search: list of dicts, each containing:
- region: region name
- keyword: keyword name
- count: number of jobs fetched for this search
"""
# We'll collect URLs discovered in this run and then remove any DB listings
# not present in this set (treat DB as reflecting current search results).
existing_db_urls = set(row['url'] for row in db_get_all_job_urls())
discovered_urls = set()
new_rows = []
new_jobs = []
search_results = [] # Track count per search
# Ensure regions/keywords master lists exist
try:
@@ -58,13 +107,14 @@ def fetch_listings():
# Build a canonical search identifier for this region+keyword combination.
url = get_base_url().format(region=region, keyword=keyword_name.replace(" ", "+"))
search_page_id = f"search:{region_name}:{keyword_name}"
search_count = 0 # Count jobs for this search
try:
last = get_last_fetch_time(url)
if last is not None:
# skip if fetched within the last 24 hours
# skip if fetched within the last hour
age = datetime.now(
timezone.utc) - (last if last.tzinfo is not None else last.replace(tzinfo=timezone.utc))
if age.total_seconds() < 24 * 3600:
if age.total_seconds() < 1 * 3600:
yield f"Skipping {region_name} + {keyword_name} (fetched {age.seconds//3600}h ago)...\n"
processed += 1
continue
@@ -82,8 +132,18 @@ def fetch_listings():
for row in process_region_keyword(region_name, keyword_name, discovered_urls):
timestamp, region, keyword, title, pay, location, url = row
discovered_urls.add(url)
search_count += 1
if url not in existing_db_urls:
new_rows.append(row)
new_jobs.append({
"timestamp": timestamp,
"region": region,
"keyword": keyword,
"title": title,
"pay": pay,
"location": location,
"url": url,
})
# Upsert or update listing to reflect current search result
upsert_listing(
url=url,
@@ -96,18 +156,29 @@ def fetch_listings():
fetched_from=search_page_id,
fetched_at=datetime.now(timezone.utc),
)
# Record per-search count
search_results.append({
"region": region_name,
"keyword": keyword_name,
"count": search_count
})
yield f"Listing fetch complete: {len(discovered_urls)} discovered, {len(new_rows)} new,\n"
return {"discovered": len(discovered_urls), "new": len(new_rows)}
return {
"discovered": len(discovered_urls),
"new": len(new_rows),
"by_search": search_results,
"new_jobs": new_jobs,
}
def process_job_url(job_url: str, region: str = "", keyword: str = ""):
last = get_last_fetch_time(job_url)
if last is not None:
# skip if fetched within the last 24 hours
# skip if fetched within the last hour
age = datetime.now(
timezone.utc) - (last if last.tzinfo is not None else last.replace(tzinfo=timezone.utc))
if age.total_seconds() < 24 * 3600:
if age.total_seconds() < 1 * 3600:
yield f"Skipping job {job_url} (fetched {age.seconds//3600}h ago)...\n"
return None
@@ -124,10 +195,17 @@ def process_job_url(job_url: str, region: str = "", keyword: str = ""):
yield f"Scraping job data from {job_url}\n"
job_data = scrape_job_page(content, job_url)
if job_data:
negative_info = _negative_match_details(job_data)
if negative_info:
keyword, field = negative_info
yield (
f"Skipping job {job_id} due to negative keyword "
f"'{keyword}' in {field}\n"
)
remove_job(job_url)
return None
yield f"Upserting job details for {job_id}\n"
upsert_job_details(job_data, region=region, keyword=keyword)
upsert_user_interaction(
job_id, seen_at=datetime.now(timezone.utc).isoformat())
yield f"Successfully processed job {job_id}: {job_data.get('title', 'Unknown')}\n"
return job_data
else:
@@ -146,8 +224,29 @@ def scraper():
# First, fetch current listings from search pages and make DB reflect them.
yield "Fetching listings...\n"
for message in fetch_listings():
yield message
listing_summary: dict | None = None
fetch_iter = fetch_listings()
try:
while True:
message = next(fetch_iter)
yield message
except StopIteration as stop:
listing_summary = stop.value if isinstance(stop.value, dict) else {}
new_jobs = []
if listing_summary:
new_jobs = listing_summary.get("new_jobs", []) or []
if new_jobs:
yield f"Preparing email alert for {len(new_jobs)} new jobs...\n"
try:
sent, info = _send_new_job_alert(new_jobs)
if sent:
yield "Job alert email sent.\n"
else:
yield f"Skipping email alert: {info}\n"
except Exception as exc:
yield f"Failed to send job alert email: {exc}\n"
# Finally, fetch and refresh individual job pages for current listings
job_urls = db_get_all_job_urls()