feat: Implement email sending utilities and templates for job notifications
Some checks failed
CI/CD Pipeline / test (push) Failing after 4m9s

- Added email_service.py for sending emails with SMTP configuration.
- Introduced email_templates.py to render job alert email subjects and bodies.
- Enhanced scraper.py to extract contact information from job listings.
- Updated settings.js to handle negative keyword input validation.
- Created email.html and email_templates.html for managing email subscriptions and templates in the admin interface.
- Modified base.html to include links for email alerts and templates.
- Expanded user settings.html to allow management of negative keywords.
- Updated utils.py to include functions for retrieving negative keywords and email settings.
- Enhanced job filtering logic to exclude jobs containing negative keywords.
This commit is contained in:
2025-11-28 18:15:08 +01:00
parent 8afb208985
commit 2185a07ff0
23 changed files with 2660 additions and 63 deletions

View File

@@ -18,8 +18,10 @@ from web.db import (
get_user_by_id,
get_user_regions,
get_user_keywords,
get_user_negative_keywords,
set_user_regions,
set_user_keywords,
set_user_negative_keywords,
get_all_regions,
get_all_keywords,
stats_overview,
@@ -30,7 +32,15 @@ from web.db import (
rename_region,
rename_keyword,
change_region_color,
change_keyword_color
change_keyword_color,
subscribe_email,
unsubscribe_email,
list_email_subscriptions,
list_email_templates,
create_email_template,
update_email_template,
delete_email_template,
get_email_template,
)
from web.utils import (
initialize_users_from_settings,
@@ -39,6 +49,7 @@ from web.utils import (
now_iso,
)
from web.db import get_all_regions, get_all_keywords
from web.email_templates import render_job_alert_email
app = Flask(__name__)
app.secret_key = os.environ.get("FLASK_SECRET", "dev-secret-change-me")
@@ -109,24 +120,30 @@ def index():
# Apply user preference filters if no explicit filters provided
selected_region = request.args.get("region")
selected_keyword = request.args.get("keyword")
if not selected_region and session.get('username'):
user_negative_keywords = []
if session.get('username'):
try:
prefs = get_user_regions(session['username'])
if prefs:
# If user has region prefs, filter to them by default
all_jobs = [j for j in all_jobs if j.get(
'region') in set(prefs)]
username = session['username']
if not selected_region:
prefs = get_user_regions(username)
if prefs:
# If user has region prefs, filter to them by default
all_jobs = [j for j in all_jobs if j.get(
'region') in set(prefs)]
if not selected_keyword:
prefs = get_user_keywords(username)
if prefs:
all_jobs = [j for j in all_jobs if j.get(
'keyword') in set(prefs)]
# Always fetch negative keywords for logged-in users
user_negative_keywords = get_user_negative_keywords(username)
except Exception:
pass
if not selected_keyword and session.get('username'):
try:
prefs = get_user_keywords(session['username'])
if prefs:
all_jobs = [j for j in all_jobs if j.get(
'keyword') in set(prefs)]
except Exception:
pass
filtered_jobs = filter_jobs(all_jobs, selected_region, selected_keyword)
filtered_jobs = filter_jobs(
all_jobs, selected_region, selected_keyword, negative_keywords=user_negative_keywords)
return render_template(
"index.html",
@@ -180,23 +197,26 @@ def jobs():
# Respect user preferences when no explicit filters provided
region = request.args.get("region")
keyword = request.args.get("keyword")
if not region and session.get('username'):
user_negative_keywords = []
if session.get('username'):
try:
prefs = get_user_regions(session['username'])
if prefs:
all_jobs = [j for j in all_jobs if j.get(
'region') in set(prefs)]
username = session['username']
if not region:
prefs = get_user_regions(username)
if prefs:
all_jobs = [j for j in all_jobs if j.get(
'region') in set(prefs)]
if not keyword:
prefs = get_user_keywords(username)
if prefs:
all_jobs = [j for j in all_jobs if j.get(
'keyword') in set(prefs)]
user_negative_keywords = get_user_negative_keywords(username)
except Exception:
pass
if not keyword and session.get('username'):
try:
prefs = get_user_keywords(session['username'])
if prefs:
all_jobs = [j for j in all_jobs if j.get(
'keyword') in set(prefs)]
except Exception:
pass
return jsonify(filter_jobs(all_jobs, region, keyword))
return jsonify(filter_jobs(all_jobs, region, keyword, negative_keywords=user_negative_keywords))
@app.route('/job_details', methods=['GET'])
@@ -358,6 +378,130 @@ def admin_user_delete(user_id):
return redirect(url_for('admin_users'))
@app.route('/admin/emails', methods=['GET', 'POST'])
def admin_emails():
if not require_admin():
return redirect(url_for('login'))
if request.method == 'POST':
action = (request.form.get('action') or '').strip().lower()
email = (request.form.get('email') or '').strip()
try:
if action == 'subscribe':
subscribe_email(email)
flash('Subscription saved')
elif action == 'unsubscribe':
if unsubscribe_email(email):
flash('Subscription deactivated')
else:
flash('No matching subscription found')
elif action == 'reactivate':
subscribe_email(email)
flash('Subscription reactivated')
else:
flash('Unknown action')
except ValueError as exc:
flash(f'Error: {exc}')
except Exception as exc:
flash(f'Error: {exc}')
return redirect(url_for('admin_emails'))
subscriptions = list_email_subscriptions()
class Sub(dict):
__getattr__ = dict.get
subscription_rows = [Sub(s) for s in subscriptions]
active_count = sum(1 for s in subscription_rows if s.get('is_active'))
return render_template(
'admin/email.html',
title='Email Subscriptions',
subscriptions=subscription_rows,
total_active=active_count,
total=len(subscription_rows),
)
@app.route('/admin/email-templates', methods=['GET', 'POST'])
def admin_email_templates():
if not require_admin():
return redirect(url_for('login'))
if request.method == 'POST':
action = (request.form.get('action') or '').strip().lower()
template_id = request.form.get('template_id')
name = request.form.get('name') or ''
slug = request.form.get('slug') or ''
subject = request.form.get('subject') or ''
body = request.form.get('body') or ''
is_active = request.form.get('is_active') == 'on'
try:
if action == 'create':
create_email_template(
name=name, slug=slug, subject=subject, body=body, is_active=is_active)
flash('Template created')
elif action == 'update':
update_email_template(
int(template_id or 0),
name=name,
slug=slug or None,
subject=subject,
body=body,
is_active=is_active,
)
flash('Template updated')
elif action == 'delete':
if delete_email_template(int(template_id or 0)):
flash('Template deleted')
else:
flash('Template not found')
else:
flash('Unknown action')
except ValueError as exc:
flash(f'Error: {exc}')
except Exception as exc:
flash(f'Error: {exc}')
return redirect(url_for('admin_email_templates'))
templates = list_email_templates(include_inactive=True)
edit_id = request.args.get('template_id', type=int)
editing = get_email_template(edit_id) if edit_id else None
preview_payload = None
preview_template = None
preview_id = request.args.get('preview_id', type=int)
if preview_id:
preview_template = get_email_template(preview_id)
if preview_template:
sample_jobs = [
{
'title': 'Senior Python Engineer',
'company': 'ACME Corp',
'location': 'Remote',
'url': 'https://example.com/jobs/1',
},
{
'title': 'Data Engineer',
'company': 'Globex',
'location': 'New York, NY',
'url': 'https://example.com/jobs/2',
},
]
preview_payload = render_job_alert_email(
sample_jobs,
region='preview-region',
keyword='preview-keyword',
template_override=preview_template,
)
return render_template(
'admin/email_templates.html',
title='Email Templates',
templates=templates,
editing=editing,
preview=preview_payload,
preview_template=preview_template,
)
# ---------------- User settings (regions/keywords) -------------------------
@app.route('/settings', methods=['GET', 'POST'])
@@ -369,6 +513,8 @@ def user_settings():
# Accept JSON or form posts. Normalize singular/plural names.
sel_regions: list[str] = []
sel_keywords: list[str] = []
sel_negative_keywords: list[str] = []
if request.is_json:
data = request.get_json(silent=True) or {}
sel_regions = [
@@ -377,16 +523,25 @@ def user_settings():
sel_keywords = [
(v or '').strip() for v in (data.get('keywords') or []) if v and (v or '').strip()
]
sel_negative_keywords = [
(v or '').strip() for v in (data.get('negative_keywords') or []) if v and (v or '').strip()
]
else:
# HTML form fallback: support names 'regions' or 'region', 'keywords' or 'keyword'
r_vals = request.form.getlist(
'regions') + request.form.getlist('region')
k_vals = request.form.getlist(
'keywords') + request.form.getlist('keyword')
nk_vals = request.form.getlist(
'negative_keywords') + request.form.getlist('negative_keyword')
sel_regions = [(v or '').strip()
for v in r_vals if v and (v or '').strip()]
sel_keywords = [(v or '').strip()
for v in k_vals if v and (v or '').strip()]
sel_negative_keywords = [(v or '').strip()
for v in nk_vals if v and (v or '').strip()]
# Upsert any new values into master lists
for r in sel_regions:
try:
@@ -398,9 +553,14 @@ def user_settings():
upsert_keyword(k)
except Exception:
pass
# Negative keywords are upserted inside set_user_negative_keywords implicitly if we wanted,
# but let's stick to the pattern. Actually set_user_negative_keywords calls upsert_negative_keyword.
try:
set_user_regions(username, sel_regions)
set_user_keywords(username, sel_keywords)
set_user_negative_keywords(username, sel_negative_keywords)
# For JSON callers, return 200 without redirect
if request.is_json:
return jsonify({"status": "ok"})
@@ -415,6 +575,8 @@ def user_settings():
all_keywords = get_all_keywords()
user_regions = get_user_regions(username)
user_keywords = get_user_keywords(username)
user_negative_keywords = get_user_negative_keywords(username)
return render_template(
'user/settings.html',
title='Your Preferences',
@@ -422,6 +584,7 @@ def user_settings():
all_keywords=all_keywords,
user_regions=user_regions,
user_keywords=user_keywords,
user_negative_keywords=user_negative_keywords,
)

View File

@@ -18,18 +18,67 @@ import time
from web.utils import (
get_base_url,
make_request_with_retry,
now_iso,
get_email_settings,
)
from web.db import get_all_regions, get_all_keywords, seed_regions_keywords_from_listings
from web.email_templates import render_job_alert_email
from web.email_service import send_email
def _negative_match_details(job_data: dict) -> tuple[str, str] | None:
"""Return (keyword, field) when job_data indicates a negative match."""
if not job_data or not job_data.get("is_negative_match"):
return None
keyword = (job_data.get("negative_keyword_match") or "").strip()
field = (job_data.get("negative_match_field")
or "unknown").strip() or "unknown"
if not keyword:
keyword = "unknown keyword"
return keyword, field
def _send_new_job_alert(new_jobs: list[dict]) -> tuple[bool, str]:
"""Send an email alert for newly discovered jobs.
Returns (sent, message) where message explains why mail was skipped.
"""
settings = get_email_settings()
if not settings.get("enabled"):
return False, "email alerts disabled"
recipients = settings.get("recipients", []) or []
if not recipients:
return False, "no recipients configured"
payload = render_job_alert_email(new_jobs)
send_email(
subject=payload.get("subject", "New jobs available"),
body=payload.get("body", ""),
to=recipients,
settings=settings,
)
return True, "sent"
def fetch_listings():
"""Fetch job listings from all regions and keywords."""
"""Fetch job listings from all regions and keywords.
Yields progress messages and returns a dict with:
- discovered: total number of unique job URLs discovered
- new: total number of new jobs added to the database
- by_search: list of dicts, each containing:
- region: region name
- keyword: keyword name
- count: number of jobs fetched for this search
"""
# We'll collect URLs discovered in this run and then remove any DB listings
# not present in this set (treat DB as reflecting current search results).
existing_db_urls = set(row['url'] for row in db_get_all_job_urls())
discovered_urls = set()
new_rows = []
new_jobs = []
search_results = [] # Track count per search
# Ensure regions/keywords master lists exist
try:
@@ -58,13 +107,14 @@ def fetch_listings():
# Build a canonical search identifier for this region+keyword combination.
url = get_base_url().format(region=region, keyword=keyword_name.replace(" ", "+"))
search_page_id = f"search:{region_name}:{keyword_name}"
search_count = 0 # Count jobs for this search
try:
last = get_last_fetch_time(url)
if last is not None:
# skip if fetched within the last 24 hours
# skip if fetched within the last hour
age = datetime.now(
timezone.utc) - (last if last.tzinfo is not None else last.replace(tzinfo=timezone.utc))
if age.total_seconds() < 24 * 3600:
if age.total_seconds() < 1 * 3600:
yield f"Skipping {region_name} + {keyword_name} (fetched {age.seconds//3600}h ago)...\n"
processed += 1
continue
@@ -82,8 +132,18 @@ def fetch_listings():
for row in process_region_keyword(region_name, keyword_name, discovered_urls):
timestamp, region, keyword, title, pay, location, url = row
discovered_urls.add(url)
search_count += 1
if url not in existing_db_urls:
new_rows.append(row)
new_jobs.append({
"timestamp": timestamp,
"region": region,
"keyword": keyword,
"title": title,
"pay": pay,
"location": location,
"url": url,
})
# Upsert or update listing to reflect current search result
upsert_listing(
url=url,
@@ -96,18 +156,29 @@ def fetch_listings():
fetched_from=search_page_id,
fetched_at=datetime.now(timezone.utc),
)
# Record per-search count
search_results.append({
"region": region_name,
"keyword": keyword_name,
"count": search_count
})
yield f"Listing fetch complete: {len(discovered_urls)} discovered, {len(new_rows)} new,\n"
return {"discovered": len(discovered_urls), "new": len(new_rows)}
return {
"discovered": len(discovered_urls),
"new": len(new_rows),
"by_search": search_results,
"new_jobs": new_jobs,
}
def process_job_url(job_url: str, region: str = "", keyword: str = ""):
last = get_last_fetch_time(job_url)
if last is not None:
# skip if fetched within the last 24 hours
# skip if fetched within the last hour
age = datetime.now(
timezone.utc) - (last if last.tzinfo is not None else last.replace(tzinfo=timezone.utc))
if age.total_seconds() < 24 * 3600:
if age.total_seconds() < 1 * 3600:
yield f"Skipping job {job_url} (fetched {age.seconds//3600}h ago)...\n"
return None
@@ -124,10 +195,17 @@ def process_job_url(job_url: str, region: str = "", keyword: str = ""):
yield f"Scraping job data from {job_url}\n"
job_data = scrape_job_page(content, job_url)
if job_data:
negative_info = _negative_match_details(job_data)
if negative_info:
keyword, field = negative_info
yield (
f"Skipping job {job_id} due to negative keyword "
f"'{keyword}' in {field}\n"
)
remove_job(job_url)
return None
yield f"Upserting job details for {job_id}\n"
upsert_job_details(job_data, region=region, keyword=keyword)
upsert_user_interaction(
job_id, seen_at=datetime.now(timezone.utc).isoformat())
yield f"Successfully processed job {job_id}: {job_data.get('title', 'Unknown')}\n"
return job_data
else:
@@ -146,8 +224,29 @@ def scraper():
# First, fetch current listings from search pages and make DB reflect them.
yield "Fetching listings...\n"
for message in fetch_listings():
yield message
listing_summary: dict | None = None
fetch_iter = fetch_listings()
try:
while True:
message = next(fetch_iter)
yield message
except StopIteration as stop:
listing_summary = stop.value if isinstance(stop.value, dict) else {}
new_jobs = []
if listing_summary:
new_jobs = listing_summary.get("new_jobs", []) or []
if new_jobs:
yield f"Preparing email alert for {len(new_jobs)} new jobs...\n"
try:
sent, info = _send_new_job_alert(new_jobs)
if sent:
yield "Job alert email sent.\n"
else:
yield f"Skipping email alert: {info}\n"
except Exception as exc:
yield f"Failed to send job alert email: {exc}\n"
# Finally, fetch and refresh individual job pages for current listings
job_urls = db_get_all_job_urls()

420
web/db.py
View File

@@ -5,7 +5,7 @@ from __future__ import annotations
Tables:
- users(user_id PK, username UNIQUE, created_at)
- job_listings(job_id PK, url UNIQUE, region, keyword, title, pay, location, timestamp)
- job_descriptions(job_id PK FK -> job_listings, title, company, location, description, posted_time, url)
- job_descriptions(job_id PK FK -> job_listings, title, company, location, description, posted_time, url, reply_url)
- user_interactions(job_id PK FK -> job_listings, user_id FK -> users, seen_at, url_visited, is_user_favorite)
- regions(region_id PK, name UNIQUE)
- keywords(keyword_id PK, name UNIQUE)
@@ -16,6 +16,7 @@ Tables:
from datetime import datetime, UTC
from typing import Optional, Dict, Any, List
import re
from web.utils import (
get_color_from_string,
url_to_job_id,
@@ -96,10 +97,279 @@ class JobDescription(Base):
description = Column(Text)
posted_time = Column(String(TIME_LEN))
url = Column(String(URL_LEN))
reply_url = Column(String(URL_LEN))
contact_email = Column(String(SHORT_LEN))
contact_phone = Column(String(SHORT_LEN))
contact_name = Column(String(SHORT_LEN))
listing = relationship("JobListing", back_populates="description")
def _normalize_email(value: Optional[str]) -> str:
if not value or not isinstance(value, str):
return ""
return value.strip().lower()
def subscribe_email(email: str) -> bool:
"""Add or reactivate an email subscription."""
address = _normalize_email(email)
if not address:
raise ValueError("email address required")
with _ensure_session() as session:
existing = session.execute(
text(
"SELECT subscription_id, is_active FROM email_subscriptions WHERE email = :e"
),
{"e": address},
).fetchone()
now = datetime.now(UTC)
if existing:
session.execute(
text(
"UPDATE email_subscriptions SET is_active = 1, updated_at = :u WHERE subscription_id = :sid"
),
{"u": now, "sid": existing[0]},
)
else:
session.execute(
text(
"INSERT INTO email_subscriptions(email, is_active, created_at, updated_at) "
"VALUES(:e, 1, :u, :u)"
),
{"e": address, "u": now},
)
session.commit()
return True
def unsubscribe_email(email: str) -> bool:
"""Deactivate an email subscription."""
address = _normalize_email(email)
if not address:
raise ValueError("email address required")
with _ensure_session() as session:
now = datetime.now(UTC)
result = session.execute(
text(
"UPDATE email_subscriptions SET is_active = 0, updated_at = :u WHERE email = :e"
),
{"u": now, "e": address},
)
session.commit()
rowcount = getattr(result, "rowcount", None)
if rowcount is None:
return False
return rowcount > 0
def list_email_subscriptions(*, active_only: bool = False) -> List[Dict[str, Any]]:
"""Return subscription rows as dicts."""
query = "SELECT subscription_id, email, is_active, created_at, updated_at FROM email_subscriptions"
params: Dict[str, Any] = {}
if active_only:
query += " WHERE is_active = 1"
query += " ORDER BY email"
with _ensure_session() as session:
rows = session.execute(text(query), params).fetchall()
result: List[Dict[str, Any]] = []
for row in rows:
result.append(
{
"subscription_id": row[0],
"email": row[1],
"is_active": bool(row[2]),
"created_at": row[3],
"updated_at": row[4],
}
)
return result
def get_active_email_recipients() -> List[str]:
"""Return list of active subscription email addresses."""
return [s["email"] for s in list_email_subscriptions(active_only=True)]
def _normalize_slug(value: Optional[str]) -> str:
if not value:
return ""
slug = re.sub(r"[^a-zA-Z0-9-]+", "-", value.strip().lower())
slug = re.sub(r"-+", "-", slug).strip("-")
return slug
def _template_to_dict(template: EmailTemplate) -> Dict[str, Any]:
created = getattr(template, "created_at", None)
updated = getattr(template, "updated_at", None)
return {
"template_id": template.template_id,
"slug": template.slug,
"name": template.name,
"subject": template.subject,
"body": template.body,
"is_active": bool(template.is_active),
"created_at": created.isoformat() if isinstance(created, datetime) else created,
"updated_at": updated.isoformat() if isinstance(updated, datetime) else updated,
}
def list_email_templates(*, include_inactive: bool = True) -> List[Dict[str, Any]]:
with _ensure_session() as session:
query = session.query(EmailTemplate)
if not include_inactive:
query = query.filter(EmailTemplate.is_active.is_(True))
items = query.order_by(EmailTemplate.name.asc()).all()
return [_template_to_dict(obj) for obj in items]
def get_email_template(template_id: int) -> Optional[Dict[str, Any]]:
if not template_id:
return None
with _ensure_session() as session:
obj = session.get(EmailTemplate, int(template_id))
return _template_to_dict(obj) if obj else None
def get_email_template_by_slug(slug: str) -> Optional[Dict[str, Any]]:
normalized = _normalize_slug(slug)
if not normalized:
return None
with _ensure_session() as session:
obj = session.query(EmailTemplate).filter(
EmailTemplate.slug == normalized).one_or_none()
return _template_to_dict(obj) if obj else None
def create_email_template(
*,
name: str,
subject: str,
body: str,
slug: Optional[str] = None,
is_active: bool = True,
) -> Dict[str, Any]:
name_clean = (name or "").strip()
if not name_clean:
raise ValueError("Template name is required")
subject_clean = (subject or "").strip()
if not subject_clean:
raise ValueError("Template subject is required")
body_clean = (body or "").strip()
if not body_clean:
raise ValueError("Template body is required")
slug_clean = _normalize_slug(slug or name_clean)
if not slug_clean:
raise ValueError("Template slug is required")
with _ensure_session() as session:
existing = session.query(EmailTemplate).filter(
EmailTemplate.slug == slug_clean).one_or_none()
if existing:
raise ValueError("A template with this slug already exists")
template = EmailTemplate(
name=name_clean,
slug=slug_clean,
subject=subject_clean,
body=body_clean,
is_active=bool(is_active),
)
session.add(template)
session.commit()
session.refresh(template)
return _template_to_dict(template)
def update_email_template(
template_id: int,
*,
name: Optional[str] = None,
subject: Optional[str] = None,
body: Optional[str] = None,
slug: Optional[str] = None,
is_active: Optional[bool] = None,
) -> Dict[str, Any]:
if not template_id:
raise ValueError("template_id is required")
with _ensure_session() as session:
template = session.get(EmailTemplate, int(template_id))
if template is None:
raise ValueError("Template not found")
if name is not None:
name_clean = name.strip()
if not name_clean:
raise ValueError("Template name is required")
setattr(template, "name", name_clean)
if subject is not None:
subject_clean = subject.strip()
if not subject_clean:
raise ValueError("Template subject is required")
setattr(template, "subject", subject_clean)
if body is not None:
body_clean = body.strip()
if not body_clean:
raise ValueError("Template body is required")
setattr(template, "body", body_clean)
if slug is not None:
slug_clean = _normalize_slug(slug)
if not slug_clean:
raise ValueError("Template slug is required")
existing = (
session.query(EmailTemplate)
.filter(EmailTemplate.slug == slug_clean, EmailTemplate.template_id != template.template_id)
.one_or_none()
)
if existing:
raise ValueError("A template with this slug already exists")
setattr(template, "slug", slug_clean)
if is_active is not None:
setattr(template, "is_active", bool(is_active))
template.touch()
session.commit()
session.refresh(template)
return _template_to_dict(template)
def delete_email_template(template_id: int) -> bool:
if not template_id:
return False
with _ensure_session() as session:
template = session.get(EmailTemplate, int(template_id))
if template is None:
return False
session.delete(template)
session.commit()
return True
def ensure_default_email_template() -> None:
try:
from web.email_templates import DEFAULT_JOB_ALERT_SUBJECT, DEFAULT_JOB_ALERT_BODY
except Exception:
DEFAULT_JOB_ALERT_SUBJECT = "{count_label}{scope}"
DEFAULT_JOB_ALERT_BODY = (
"Hi,\n\n{intro_line}\n{jobs_message}\n\nGenerated at {timestamp} UTC.\n"
"You are receiving this message because job alerts are enabled.\n"
)
try:
with _ensure_session() as session:
existing = session.query(EmailTemplate).filter(
EmailTemplate.slug == "job-alert").one_or_none()
if existing is None:
template = EmailTemplate(
name="Job Alert",
slug="job-alert",
subject=DEFAULT_JOB_ALERT_SUBJECT,
body=DEFAULT_JOB_ALERT_BODY,
is_active=True,
)
session.add(template)
session.commit()
except Exception:
pass
class UserInteraction(Base):
__tablename__ = "user_interactions"
# composite uniqueness on (user_id, job_id)
@@ -146,6 +416,20 @@ class UserKeyword(Base):
"keywords.keyword_id", ondelete="CASCADE"), primary_key=True)
class NegativeKeyword(Base):
__tablename__ = "negative_keywords"
keyword_id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String(SHORT_LEN), unique=True, nullable=False)
class UserNegativeKeyword(Base):
__tablename__ = "user_negative_keywords"
user_id = Column(Integer, ForeignKey(
"users.user_id", ondelete="CASCADE"), primary_key=True)
keyword_id = Column(Integer, ForeignKey(
"negative_keywords.keyword_id", ondelete="CASCADE"), primary_key=True)
class Log(Base):
__tablename__ = "logs"
id = Column(Integer, primary_key=True, autoincrement=True)
@@ -155,6 +439,35 @@ class Log(Base):
fetched_at = Column(DateTime)
class EmailSubscription(Base):
__tablename__ = "email_subscriptions"
subscription_id = Column(Integer, primary_key=True, autoincrement=True)
email = Column(String(SHORT_LEN), unique=True, nullable=False)
is_active = Column(Boolean, default=True, nullable=False)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, default=datetime.utcnow, nullable=False)
def touch(self):
setattr(self, "updated_at", datetime.utcnow())
class EmailTemplate(Base):
__tablename__ = "email_templates"
template_id = Column(Integer, primary_key=True, autoincrement=True)
slug = Column(String(SHORT_LEN), unique=True, nullable=False)
name = Column(String(SHORT_LEN), nullable=False)
subject = Column(Text, nullable=False)
body = Column(Text, nullable=False)
is_active = Column(Boolean, default=True, nullable=False)
created_at = Column(
DateTime, default=lambda: datetime.now(UTC), nullable=False)
updated_at = Column(
DateTime, default=lambda: datetime.now(UTC), nullable=False)
def touch(self):
setattr(self, "updated_at", datetime.now(UTC))
def _ensure_session() -> Session:
global engine, SessionLocal
if engine is None or SessionLocal is None:
@@ -202,6 +515,31 @@ def db_init():
text("ALTER TABLE users ADD COLUMN IF NOT EXISTS last_login DATETIME NULL"))
except Exception:
pass
try:
conn.execute(text(
"ALTER TABLE job_descriptions ADD COLUMN IF NOT EXISTS reply_url VARCHAR(512) NULL"))
except Exception:
pass
try:
conn.execute(text(
"ALTER TABLE job_descriptions ADD COLUMN IF NOT EXISTS contact_email VARCHAR(255) NULL"))
except Exception:
pass
try:
conn.execute(text(
"ALTER TABLE job_descriptions ADD COLUMN IF NOT EXISTS contact_phone VARCHAR(255) NULL"))
except Exception:
pass
try:
conn.execute(text(
"ALTER TABLE job_descriptions ADD COLUMN IF NOT EXISTS contact_name VARCHAR(255) NULL"))
except Exception:
pass
try:
ensure_default_email_template()
except Exception:
pass
def upsert_user_interaction(job_id: str | int, *, user_id: Optional[int] = None, seen_at: Optional[str] = None, url_visited: Optional[str] = None, is_user_favorite: Optional[bool] = None):
@@ -279,6 +617,9 @@ def upsert_job_details(job_data: Dict[str, Any], region: str = "", keyword: str
the function will skip updating to avoid unnecessary work.
- On successful upsert, a log entry is recorded with `insert_log(url, ...)`.
"""
if not job_data or job_data.get("is_negative_match"):
return
url = job_data.get("url")
job_id = normalize_job_id(job_data.get("id"), url)
if not job_id:
@@ -303,6 +644,10 @@ def upsert_job_details(job_data: Dict[str, Any], region: str = "", keyword: str
location = job_data.get("location") or None
description = job_data.get("description") or None
posted_time = job_data.get("posted_time") or None
reply_url = job_data.get("reply_url") or None
contact_email = job_data.get("contact_email") or None
contact_phone = job_data.get("contact_phone") or None
contact_name = job_data.get("contact_name") or None
job_id = str(job_id)
with _ensure_session() as session:
@@ -316,6 +661,10 @@ def upsert_job_details(job_data: Dict[str, Any], region: str = "", keyword: str
setattr(obj, "description", description)
setattr(obj, "posted_time", posted_time)
setattr(obj, "url", url)
setattr(obj, "reply_url", reply_url)
setattr(obj, "contact_email", contact_email)
setattr(obj, "contact_phone", contact_phone)
setattr(obj, "contact_name", contact_name)
session.commit()
# Record that we fetched/updated this job page
try:
@@ -627,6 +976,27 @@ def upsert_keyword(name: str) -> int:
return upsert_keyword(name)
def upsert_negative_keyword(name: str) -> int:
"""Get or create a negative keyword by name; return keyword_id."""
name = (name or "").strip().lower()
if not name:
raise ValueError("Negative keyword cannot be empty")
with _ensure_session() as session:
row = session.execute(text("SELECT keyword_id FROM negative_keywords WHERE name = :n"), {
"n": name}).fetchone()
if row:
return int(row[0])
session.execute(
text("INSERT INTO negative_keywords(name) VALUES (:n)"), {"n": name})
session.commit()
with _ensure_session() as session:
row2 = session.execute(text("SELECT keyword_id FROM negative_keywords WHERE name = :n"), {
"n": name}).fetchone()
if row2:
return int(row2[0])
return upsert_negative_keyword(name)
def set_user_regions(username: str, region_names: List[str]) -> None:
"""Replace user's preferred regions with given names."""
user_id = get_or_create_user(username)
@@ -685,6 +1055,34 @@ def set_user_keywords(username: str, keyword_names: List[str]) -> None:
session.commit()
def set_user_negative_keywords(username: str, keyword_names: List[str]) -> None:
"""Replace user's negative keywords with given names."""
user_id = get_or_create_user(username)
names = sorted({(n or "").strip().lower()
for n in keyword_names if (n or "").strip()})
keyword_ids: List[int] = [upsert_negative_keyword(n) for n in names]
if not keyword_ids and not names:
with _ensure_session() as session:
session.execute(
text("DELETE FROM user_negative_keywords WHERE user_id = :u"), {"u": user_id})
session.commit()
return
desired = set(keyword_ids)
with _ensure_session() as session:
rows = session.execute(text("SELECT keyword_id FROM user_negative_keywords WHERE user_id = :u"), {
"u": user_id}).fetchall()
current = set(int(r[0]) for r in rows)
to_add = desired - current
to_remove = current - desired
for kid in to_remove:
session.execute(text("DELETE FROM user_negative_keywords WHERE user_id = :u AND keyword_id = :k"), {
"u": user_id, "k": int(kid)})
for kid in to_add:
session.execute(text("INSERT INTO user_negative_keywords(user_id, keyword_id) VALUES(:u, :k)"), {
"u": user_id, "k": int(kid)})
session.commit()
def get_user_regions(username: str) -> List[Dict[str, str]]:
"""Return preferred region names for a user (empty if none)."""
with _ensure_session() as session:
@@ -725,6 +1123,26 @@ def get_user_keywords(username: str) -> List[Dict[str, str]]:
return [{"name": r[0], "color": r[1]} for r in rows]
def get_user_negative_keywords(username: str) -> List[str]:
"""Return negative keyword names for a user (empty if none)."""
with _ensure_session() as session:
row = session.execute(text("SELECT user_id FROM users WHERE username = :u"), {
"u": username}).fetchone()
if not row:
return []
user_id = int(row[0])
rows = session.execute(text(
"""
SELECT k.name
FROM negative_keywords k
INNER JOIN user_negative_keywords uk ON uk.keyword_id = k.keyword_id
WHERE uk.user_id = :u
ORDER BY k.name ASC
"""
), {"u": user_id}).fetchall()
return [r[0] for r in rows]
def get_all_regions() -> List[Dict[str, str]]:
"""Return all region names from regions table (sorted)."""
with _ensure_session() as session:

130
web/email_service.py Normal file
View File

@@ -0,0 +1,130 @@
"""Email sending utilities for the jobs scraper."""
from __future__ import annotations
from email.message import EmailMessage
from typing import Iterable, Sequence
import smtplib
from web.utils import get_email_settings
class EmailConfigurationError(RuntimeError):
"""Raised when email settings are missing or invalid."""
class EmailDeliveryError(RuntimeError):
"""Raised when an email fails to send."""
def _normalize_addresses(addresses: Sequence[str] | str | None) -> list[str]:
if not addresses:
return []
if isinstance(addresses, str):
items = [addresses]
else:
items = list(addresses)
cleaned: list[str] = []
seen: set[str] = set()
for raw in items:
if not isinstance(raw, str):
continue
addr = raw.strip()
if not addr:
continue
lower = addr.lower()
if lower in seen:
continue
seen.add(lower)
cleaned.append(addr)
return cleaned
def _ensure_recipients(*recipient_groups: Iterable[str]) -> list[str]:
merged: list[str] = []
seen: set[str] = set()
for group in recipient_groups:
for addr in group:
lower = addr.lower()
if lower in seen:
continue
seen.add(lower)
merged.append(addr)
if not merged:
raise EmailConfigurationError(
"At least one recipient address is required")
return merged
def send_email(
*,
subject: str,
body: str,
to: Sequence[str] | str,
cc: Sequence[str] | str | None = None,
bcc: Sequence[str] | str | None = None,
reply_to: Sequence[str] | str | None = None,
settings: dict | None = None,
) -> bool:
"""Send an email using configured SMTP settings.
Returns True when a message is sent, False when email is disabled.
Raises EmailConfigurationError for invalid config and EmailDeliveryError for SMTP failures.
"""
config = settings or get_email_settings()
if not config.get("enabled"):
return False
smtp_cfg = config.get("smtp", {})
host = (smtp_cfg.get("host") or "").strip()
if not host:
raise EmailConfigurationError("SMTP host is not configured")
port = int(smtp_cfg.get("port", 587) or 587)
timeout = int(smtp_cfg.get("timeout", 30) or 30)
use_ssl = bool(smtp_cfg.get("use_ssl", False))
use_tls = bool(smtp_cfg.get("use_tls", True))
from_address = (config.get("from_address")
or smtp_cfg.get("username") or "").strip()
if not from_address:
raise EmailConfigurationError("From address is not configured")
to_list = _normalize_addresses(to)
cc_list = _normalize_addresses(cc)
bcc_list = _normalize_addresses(bcc)
reply_to_list = _normalize_addresses(reply_to)
all_recipients = _ensure_recipients(to_list, cc_list, bcc_list)
message = EmailMessage()
message["Subject"] = subject
message["From"] = from_address
message["To"] = ", ".join(to_list)
if cc_list:
message["Cc"] = ", ".join(cc_list)
if reply_to_list:
message["Reply-To"] = ", ".join(reply_to_list)
message.set_content(body)
username = (smtp_cfg.get("username") or "").strip()
password = smtp_cfg.get("password") or ""
client_cls = smtplib.SMTP_SSL if use_ssl else smtplib.SMTP
try:
with client_cls(host=host, port=port, timeout=timeout) as client:
client.ehlo()
if use_tls and not use_ssl:
client.starttls()
client.ehlo()
if username:
client.login(username, password)
client.send_message(message, from_addr=from_address,
to_addrs=all_recipients)
except EmailConfigurationError:
raise
except Exception as exc: # pragma: no cover - network errors depend on env
raise EmailDeliveryError(str(exc)) from exc
return True

106
web/email_templates.py Normal file
View File

@@ -0,0 +1,106 @@
"""Email templates for job notifications."""
from __future__ import annotations
from datetime import datetime, UTC
from typing import Iterable, Mapping, Dict, Any
DEFAULT_DATETIME_FORMAT = "%Y-%m-%d %H:%M"
DEFAULT_JOB_ALERT_SUBJECT = "{count_label}{scope}"
DEFAULT_JOB_ALERT_BODY = (
"Hi,\n\n{intro_line}{jobs_section}\n\nGenerated at {timestamp} UTC.\n"
"You are receiving this message because job alerts are enabled.\n"
)
class _SafeDict(dict):
def __missing__(self, key: str) -> str:
return ""
def _format_template(template: str, context: Dict[str, Any]) -> str:
safe_context = _SafeDict(
{k: ("\n".join(str(v) for v in context[k]) if isinstance(
context[k], list) else context[k]) for k in context}
)
return template.format_map(safe_context)
def render_job_alert_email(
jobs: Iterable[Mapping[str, object]],
*,
region: str | None = None,
keyword: str | None = None,
generated_at: datetime | None = None,
template_override: Mapping[str, str] | None = None,
) -> dict[str, Any]:
"""Render the subject/body for a job alert email.
Returns a dict with subject/body strings and the context used to render them.
"""
job_list = list(jobs)
generated_at = generated_at or datetime.now(UTC)
timestamp = generated_at.strftime(DEFAULT_DATETIME_FORMAT)
scope_parts = []
if region:
scope_parts.append(f"region: {region}")
if keyword:
scope_parts.append(f"keyword: {keyword}")
scope = " (" + ", ".join(scope_parts) + ")" if scope_parts else ""
job_lines: list[str] = []
for index, job in enumerate(job_list, start=1):
title = str(job.get("title", "Untitled"))
company = str(job.get("company", "Unknown company"))
location = str(job.get("location", "N/A"))
url = str(job.get("url", ""))
line = f"{index}. {title}{company} ({location})"
job_lines.append(line)
if url:
job_lines.append(f" {url}")
if job_lines:
jobs_section = "\n" + "\n".join(job_lines)
else:
jobs_section = "\nNo jobs matched this alert."
jobs_message = jobs_section.strip()
context: Dict[str, Any] = {
"count": len(job_list),
"count_label": "No new jobs" if not job_list else f"{len(job_list)} new jobs",
"scope": scope,
"region": region or "",
"keyword": keyword or "",
"timestamp": timestamp,
"generated_at": generated_at,
"intro_line": "Here are the latest jobs discovered by the scraper:",
"jobs_message": jobs_message,
"jobs_section": jobs_section,
"jobs_lines": job_lines,
"has_jobs": bool(job_list),
}
template = template_override
if template is None:
try:
from web.db import get_email_template_by_slug
template = get_email_template_by_slug("job-alert")
except Exception:
template = None
template_subject = (template or {}).get(
"subject") or DEFAULT_JOB_ALERT_SUBJECT
template_body = (template or {}).get("body") or DEFAULT_JOB_ALERT_BODY
subject = _format_template(template_subject, context)
body = _format_template(template_body, context)
result = {
"subject": subject,
"body": body,
"context": context,
"template_slug": (template or {}).get("slug", "job-alert"),
}
return result

View File

@@ -1,7 +1,82 @@
from datetime import datetime, UTC
from bs4 import BeautifulSoup
from typing import List, Dict, Set
from web.utils import get_base_url, safe_get_text, safe_get_attr, make_request_with_retry
from urllib.parse import urlparse, parse_qs
import re
from web.utils import (
get_base_url,
safe_get_text,
safe_get_attr,
make_request_with_retry,
get_negative_keywords,
)
def extract_contact_info(reply_url) -> Dict[str, str]:
"""Extract contact information from reply URL.
Parses mailto links, phone links, and contact form URLs to extract:
- email: Email address (from mailto links)
- phone: Phone number (from tel links or URL parameters)
- contact_name: Contact person name (if available in URL parameters)
Returns a dict with email, phone, and contact_name keys (values may be "N/A").
"""
contact_info = {
"email": "N/A",
"phone": "N/A",
"contact_name": "N/A"
}
# Handle None or empty cases
if not reply_url or reply_url == "N/A":
return contact_info
reply_url = str(reply_url).strip()
if not reply_url or reply_url == "N/A":
return contact_info
try:
# Check for mailto links
if reply_url.startswith("mailto:"):
email_part = reply_url.replace("mailto:", "")
# Extract email (may contain ?subject=...)
email = email_part.split("?")[0]
contact_info["email"] = email
return contact_info
# Check for tel links
if reply_url.startswith("tel:"):
phone = reply_url.replace("tel:", "")
contact_info["phone"] = phone
return contact_info
# Parse as URL
if reply_url.startswith("http"):
parsed = urlparse(reply_url)
params = parse_qs(parsed.query)
# Try to extract email from parameters
for key in ["email", "from_email", "sender_email", "contact_email"]:
if key in params:
contact_info["email"] = params[key][0]
break
# Try to extract phone from parameters
for key in ["phone", "tel", "telephone"]:
if key in params:
contact_info["phone"] = params[key][0]
break
# Try to extract contact name from parameters
for key in ["contact_name", "from_name", "name"]:
if key in params:
contact_info["contact_name"] = params[key][0]
break
except Exception:
pass
return contact_info
def scrape_listings_page(listing, region: str, keyword: str, seen_urls: Set[str]) -> List:
@@ -40,6 +115,16 @@ def scrape_job_page(content: str, url: str) -> Dict:
"""Scrape job details from a job listing page."""
soup = BeautifulSoup(content, "html.parser")
# Extract reply button
reply_button = soup.find("button", class_="reply-button")
if reply_button:
reply_url = safe_get_attr(reply_button, "data-href")
else:
reply_url = "N/A"
# Extract contact information from reply URL
contact_info = extract_contact_info(reply_url)
# Extract each field
title = safe_get_text(soup.find("h1", class_="postingtitle"))
company = safe_get_text(soup.find("h2", class_="company-name"))
@@ -80,6 +165,30 @@ def scrape_job_page(content: str, url: str) -> Dict:
job_id = ""
posted_time = ""
# Negative keyword detection
negative_keyword_match = None
negative_match_field = None
negative_keywords = get_negative_keywords()
if negative_keywords:
fields_to_check = {
"title": title or "",
"company": company or "",
"location": location or "",
"description": description or "",
}
for keyword in negative_keywords:
if not keyword:
continue
pattern = re.compile(
r"\b" + re.escape(keyword) + r"\b", re.IGNORECASE)
for field_name, field_value in fields_to_check.items():
if field_value and pattern.search(field_value):
negative_keyword_match = keyword
negative_match_field = field_name
break
if negative_keyword_match:
break
return {
"url": url,
"title": title,
@@ -87,7 +196,14 @@ def scrape_job_page(content: str, url: str) -> Dict:
"location": location,
"description": description,
"id": job_id,
"posted_time": posted_time
"posted_time": posted_time,
"reply_url": reply_url,
"contact_email": contact_info["email"],
"contact_phone": contact_info["phone"],
"contact_name": contact_info["contact_name"],
"negative_keyword_match": negative_keyword_match,
"negative_match_field": negative_match_field,
"is_negative_match": bool(negative_keyword_match),
}

View File

@@ -1,4 +1,22 @@
/* javascript form handling */
document.addEventListener("DOMContentLoaded", function () {
const newNkInput = document.getElementById("new-negative-keyword");
if (newNkInput) {
newNkInput.addEventListener("input", function () {
const val = this.value.trim();
const existing = Array.from(
document.querySelectorAll('input[name="negative_keyword"]')
).map((el) => el.value);
if (existing.includes(val)) {
this.setCustomValidity("Keyword already exists");
this.reportValidity();
} else {
this.setCustomValidity("");
}
});
}
});
document
.getElementById("user-settings-form")
.addEventListener("submit", function (event) {
@@ -10,11 +28,15 @@ document
// Collect selected regions and keywords
const selectedRegions = [];
const selectedKeywords = [];
const selectedNegativeKeywords = [];
formData.forEach((value, key) => {
if (key === "region") {
selectedRegions.push(value);
} else if (key === "keyword") {
selectedKeywords.push(value);
} else if (key === "negative_keyword") {
selectedNegativeKeywords.push(value);
}
});
@@ -30,10 +52,21 @@ document
selectedKeywords.push(newKeyword);
}
// Add new negative keyword if provided
const newNegativeKeyword = formData.get("new-negative-keyword").trim();
if (newNegativeKeyword) {
if (selectedNegativeKeywords.includes(newNegativeKeyword)) {
alert("Negative keyword already exists!");
return;
}
selectedNegativeKeywords.push(newNegativeKeyword);
}
// Prepare data to send
const dataToSend = {
regions: selectedRegions,
keywords: selectedKeywords,
negative_keywords: selectedNegativeKeywords,
csrf_token: formData.get("csrf_token"),
};

View File

@@ -0,0 +1,62 @@
{% extends 'base.html' %} {% block content %}
<h2>Email Subscriptions</h2>
<section>
<h3>Add Subscription</h3>
<form method="post">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
<input type="hidden" name="action" value="subscribe" />
<label for="email">Email address</label>
<input
type="email"
id="email"
name="email"
placeholder="alerts@example.com"
required
/>
<button type="submit">Subscribe</button>
</form>
</section>
<section>
<h3>Current Recipients</h3>
{% if not subscriptions %}
<p>No subscriptions yet. Add one above to start sending alerts.</p>
<p>You can customize alert content from the <a href="{{ url_for('admin_email_templates') }}">Email Templates</a> page.</p>
{% else %}
<p>{{ total_active }} active of {{ total }} total.</p>
<table>
<thead>
<tr>
<th>Email</th>
<th>Status</th>
<th>Created</th>
<th>Updated</th>
<th>Action</th>
</tr>
</thead>
<tbody>
{% for sub in subscriptions %}
<tr>
<td>{{ sub.email }}</td>
<td>{{ 'Active' if sub.is_active else 'Inactive' }}</td>
<td>{{ sub.created_at }}</td>
<td>{{ sub.updated_at }}</td>
<td>
<form method="post" style="display: inline-flex; gap: 0.5rem">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
<input type="hidden" name="email" value="{{ sub.email }}" />
{% if sub.is_active %}
<input type="hidden" name="action" value="unsubscribe" />
<button type="submit">Deactivate</button>
{% else %}
<input type="hidden" name="action" value="reactivate" />
<button type="submit">Reactivate</button>
{% endif %}
</form>
</td>
</tr>
{% endfor %}
</tbody>
</table>
{% endif %}
</section>
{% endblock %}

View File

@@ -0,0 +1,102 @@
{% extends 'base.html' %}
{% block content %}
<h2>Email Templates</h2>
<section>
<h3>Available Templates</h3>
{% if not templates %}
<p>No templates found. Create one below to get started.</p>
{% else %}
<table>
<thead>
<tr>
<th>Name</th>
<th>Slug</th>
<th>Status</th>
<th>Updated</th>
<th>Actions</th>
</tr>
</thead>
<tbody>
{% for template in templates %}
<tr>
<td>{{ template.name }}</td>
<td>{{ template.slug }}</td>
<td>{{ 'Active' if template.is_active else 'Inactive' }}</td>
<td>{{ template.updated_at or template.created_at or '' }}</td>
<td style="display: flex; gap: 0.5rem;">
<a class="button" href="{{ url_for('admin_email_templates', template_id=template.template_id) }}">Edit</a>
<a class="button" href="{{ url_for('admin_email_templates', preview_id=template.template_id) }}">Preview</a>
<form method="post" onsubmit="return confirm('Delete template {{ template.name }}?');">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
<input type="hidden" name="action" value="delete" />
<input type="hidden" name="template_id" value="{{ template.template_id }}" />
<button type="submit">Delete</button>
</form>
</td>
</tr>
{% endfor %}
</tbody>
</table>
{% endif %}
</section>
<section>
<h3>{{ 'Edit Template' if editing else 'Create Template' }}</h3>
<form method="post">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
<input type="hidden" name="action" value="{{ 'update' if editing else 'create' }}" />
{% if editing %}
<input type="hidden" name="template_id" value="{{ editing.template_id }}" />
{% endif %}
<div>
<label for="name">Name</label>
<input type="text" id="name" name="name" value="{{ editing.name if editing else '' }}" required />
</div>
<div>
<label for="slug">Slug</label>
<input type="text" id="slug" name="slug" placeholder="job-alert" value="{{ editing.slug if editing else '' }}" />
<small>Leave blank to reuse the name. Slug must be URL friendly (letters, numbers, dashes).</small>
</div>
<div>
<label for="subject">Subject Template</label>
<input type="text" id="subject" name="subject" value="{{ editing.subject if editing else '' }}" required />
</div>
<div>
<label for="body">Body Template</label>
<textarea id="body" name="body" rows="12" required>{{ editing.body if editing else '' }}</textarea>
</div>
<div>
<label>
<input type="checkbox" name="is_active" {% if editing is none or editing.is_active %}checked{% endif %} />
Active
</label>
</div>
<button type="submit">{{ 'Update Template' if editing else 'Create Template' }}</button>
{% if editing %}
<a class="button" href="{{ url_for('admin_email_templates') }}">Cancel</a>
{% endif %}
</form>
<aside>
<h4>Available placeholders</h4>
<ul>
<li><code>{count}</code> number of jobs in the alert</li>
<li><code>{count_label}</code> "No new jobs" or "X new jobs"</li>
<li><code>{scope}</code> formatted region/keyword context</li>
<li><code>{region}</code>, <code>{keyword}</code></li>
<li><code>{timestamp}</code> formatted timestamp</li>
<li><code>{jobs_section}</code> newline-prefixed block of job entries</li>
<li><code>{jobs_message}</code> jobs block without leading newline</li>
</ul>
</aside>
</section>
{% if preview %}
<section>
<h3>Preview: {{ preview_template.name if preview_template else 'Job Alert' }}</h3>
<article>
<h4>Subject</h4>
<pre>{{ preview.subject }}</pre>
<h4>Body</h4>
<pre>{{ preview.body }}</pre>
</article>
</section>
{% endif %}
{% endblock %}

View File

@@ -16,17 +16,21 @@
<header>
<h1><a href="/">{{ title or 'Admin' }}</a></h1>
<nav>
{% if username %}<span>Hi, {{ username }}</span> | {% endif %}
<a href="{{ url_for('index') }}">Home</a> |
<a href="{{ url_for('user_settings') }}">Preferences</a>
{% if current_user and current_user.is_admin %} |
<a href="{{ url_for('scrape_page') }}">Scrape Jobs</a> |
<a href="{{ url_for('admin_taxonomy') }}">Taxonomy</a> |
<a href="{{ url_for('admin_stats') }}">Statistics</a> |
<a href="{{ url_for('admin_users') }}">Users</a> {% endif %} {% if
session.get('username') %} |
<a href="{{ url_for('logout') }}">Logout</a> {% else %} |
<a href="{{ url_for('login') }}">Login</a>{% endif %}
<div id="navigation">
{% if username %}<span>Hi, {{ username }}</span> | {% endif %}
<a href="{{ url_for('index') }}">Home</a> |
<a href="{{ url_for('user_settings') }}">Preferences</a>
{% if current_user and current_user.is_admin %} |
<a href="{{ url_for('scrape_page') }}">Scrape Jobs</a> |
<a href="{{ url_for('admin_taxonomy') }}">Taxonomy</a> |
<a href="{{ url_for('admin_stats') }}">Statistics</a> |
<a href="{{ url_for('admin_emails') }}">Email Alerts</a> |
<a href="{{ url_for('admin_email_templates') }}">Email Templates</a> |
<a href="{{ url_for('admin_users') }}">Users</a> {% endif %} {% if
session.get('username') %} |
<a href="{{ url_for('logout') }}">Logout</a> {% else %} |
<a href="{{ url_for('login') }}">Login</a>{% endif %}
</div>
</nav>
{% with messages = get_flashed_messages() %} {% if messages %}
<ul>

View File

@@ -77,6 +77,29 @@ block content %}
<p>No keywords available. Ask an admin to add some.</p>
{% endif %}
</fieldset>
<fieldset>
<legend>Negative Keywords</legend>
<p>
<small>Add new Negative Keyword:</small>
<input
type="text"
name="new-negative-keyword"
id="new-negative-keyword"
value=""
placeholder="Type a keyword and save to add"
size="30"
/>
</p>
{% if user_negative_keywords %} {% for nk in user_negative_keywords %}
<label style="display: block">
<input type="checkbox" name="negative_keyword" value="{{ nk }}" checked />
{{ nk }}
</label>
{% endfor %} {% else %}
<p>No negative keywords set.</p>
{% endif %}
<p><small>Uncheck to remove.</small></p>
</fieldset>
<button type="submit">Save</button>
</form>
{% endblock %} {% block footer_scripts %}

View File

@@ -125,6 +125,66 @@ def get_base_url() -> str:
return get_config().get('scraper', {}).get('base_url', "https://{region}.craigslist.org/search/jjj?query={keyword}&sort=rel")
def get_negative_keywords() -> List[str]:
"""Return normalized list of negative keywords from config."""
raw = get_config().get('scraper', {}).get('negative_keywords', [])
if not isinstance(raw, list):
return []
cleaned: List[str] = []
for item in raw:
if not isinstance(item, str):
continue
val = item.strip()
if not val:
continue
cleaned.append(val.lower())
return cleaned
def get_email_settings() -> Dict[str, Any]:
"""Return normalized email settings from config."""
cfg = get_config().get('email', {})
if not isinstance(cfg, dict):
cfg = {}
raw_smtp = cfg.get('smtp', {}) if isinstance(cfg.get('smtp'), dict) else {}
raw_recipients = cfg.get('recipients', [])
def _to_int(value, default):
try:
return int(value)
except (TypeError, ValueError):
return default
recipients: List[str] = []
if isinstance(raw_recipients, list):
for item in raw_recipients:
if isinstance(item, str):
addr = item.strip()
if addr:
recipients.append(addr)
smtp = {
'host': (raw_smtp.get('host') or '').strip(),
'port': _to_int(raw_smtp.get('port', 587), 587),
'username': (raw_smtp.get('username') or '').strip(),
'password': raw_smtp.get('password') or '',
'use_tls': bool(raw_smtp.get('use_tls', True)),
'use_ssl': bool(raw_smtp.get('use_ssl', False)),
'timeout': _to_int(raw_smtp.get('timeout', 30), 30),
}
if smtp['port'] <= 0:
smtp['port'] = 587
if smtp['timeout'] <= 0:
smtp['timeout'] = 30
return {
'enabled': bool(cfg.get('enabled', False)),
'from_address': (cfg.get('from_address') or '').strip(),
'smtp': smtp,
'recipients': recipients,
}
def now_iso() -> str:
"""Get the current time in ISO format."""
return datetime.now(UTC).isoformat()
@@ -203,13 +263,39 @@ def filter_jobs(
jobs: List[Dict[str, Any]],
region: Optional[str] = None,
keyword: Optional[str] = None,
negative_keywords: Optional[List[str]] = None,
) -> List[Dict[str, Any]]:
"""Filter jobs by optional region and keyword."""
"""Filter jobs by optional region, keyword, and negative keywords."""
filtered = jobs
if region:
filtered = [j for j in filtered if j.get("region") == region]
if keyword:
filtered = [j for j in filtered if j.get("keyword") == keyword]
if negative_keywords:
# Pre-compile regexes or just check substring?
# Scraper uses substring check. Let's do the same for consistency.
# Fields to check: title, company, location, description
# Note: description might contain HTML or be long.
# Normalize negative keywords
nks = [nk.lower() for nk in negative_keywords if nk]
def is_clean(job):
# Check all fields
text_blob = " ".join([
str(job.get("title") or ""),
str(job.get("company") or ""),
str(job.get("location") or ""),
str(job.get("description") or "")
]).lower()
for nk in nks:
if nk in text_blob:
return False
return True
filtered = [j for j in filtered if is_clean(j)]
return filtered