feat: Implement email sending utilities and templates for job notifications

- Added email_service.py for sending emails with SMTP configuration. - Introduced email_templates.py to render job alert email subjects and bodies. - Enhanced scraper.py to extract contact information from job listings. - Updated settings.js to handle negative keyword input validation. - Created email.html and email_templates.html for managing email subscriptions and templates in the admin interface. - Modified base.html to include links for email alerts and templates. - Expanded user settings.html to allow management of negative keywords. - Updated utils.py to include functions for retrieving negative keywords and email settings. - Enhanced job filtering logic to exclude jobs containing negative keywords.
2025-11-28 18:15:08 +01:00
parent 8afb208985
commit 2185a07ff0
23 changed files with 2660 additions and 63 deletions
--- a/tests/test_admin_email.py
+++ b/tests/test_admin_email.py
@@ -0,0 +1,84 @@
+import pytest
+from sqlalchemy import text
+
+from web.app import app
+from web.db import (
+    db_init,
+    create_or_update_user,
+    subscribe_email,
+    list_email_subscriptions,
+    _ensure_session,
+)
+
+
+@pytest.fixture(scope="function", autouse=True)
+def initialize_app():
+    app.config.update(TESTING=True, WTF_CSRF_ENABLED=False)
+    with app.app_context():
+        db_init()
+        create_or_update_user("admin", password="secret",
+                              is_admin=True, is_active=True)
+    # Clear subscriptions before and after each test to avoid leakage
+    with _ensure_session() as session:
+        session.execute(text("DELETE FROM email_subscriptions"))
+        session.commit()
+    yield
+    with _ensure_session() as session:
+        session.execute(text("DELETE FROM email_subscriptions"))
+        session.commit()
+
+
+@pytest.fixture
+def client():
+    with app.test_client() as test_client:
+        with test_client.session_transaction() as sess:
+            sess["username"] = "admin"
+        yield test_client
+
+
+@pytest.fixture
+def anon_client():
+    with app.test_client() as test_client:
+        # Ensure no admin session present
+        with test_client.session_transaction() as sess:
+            sess.pop("username", None)
+        yield test_client
+
+
+def test_admin_emails_requires_admin(anon_client):
+    response = anon_client.get("/admin/emails")
+    assert response.status_code == 302
+    assert "/login" in response.headers.get("Location", "")
+
+
+def test_admin_emails_lists_subscriptions(client):
+    subscribe_email("alice@example.com")
+    response = client.get("/admin/emails")
+    assert response.status_code == 200
+    assert b"alice@example.com" in response.data
+
+
+def test_admin_emails_can_subscribe(client):
+    response = client.post(
+        "/admin/emails",
+        data={"action": "subscribe", "email": "bob@example.com"},
+        follow_redirects=False,
+    )
+    assert response.status_code == 302
+    emails = list_email_subscriptions()
+    assert any(sub["email"] == "bob@example.com" and sub["is_active"]
+               for sub in emails)
+
+
+def test_admin_emails_can_unsubscribe(client):
+    subscribe_email("carol@example.com")
+    response = client.post(
+        "/admin/emails",
+        data={"action": "unsubscribe", "email": "carol@example.com"},
+        follow_redirects=False,
+    )
+    assert response.status_code == 302
+    emails = list_email_subscriptions()
+    matching = [sub for sub in emails if sub["email"] == "carol@example.com"]
+    assert matching
+    assert matching[0]["is_active"] is False
--- a/tests/test_admin_email_templates.py
+++ b/tests/test_admin_email_templates.py
@@ -0,0 +1,138 @@
+import pytest
+from sqlalchemy import text
+
+from web.app import app
+from web.db import (
+    db_init,
+    create_or_update_user,
+    list_email_templates,
+    update_email_template,
+    _ensure_session,
+    ensure_default_email_template,
+)
+from web.email_templates import render_job_alert_email
+
+
+@pytest.fixture(scope="function", autouse=True)
+def setup_database():
+    app.config.update(TESTING=True, WTF_CSRF_ENABLED=False)
+    with app.app_context():
+        db_init()
+        create_or_update_user("admin", password="secret", is_admin=True, is_active=True)
+    with _ensure_session() as session:
+        session.execute(text("DELETE FROM email_templates"))
+        session.commit()
+    ensure_default_email_template()
+    yield
+    with _ensure_session() as session:
+        session.execute(text("DELETE FROM email_templates"))
+        session.commit()
+    ensure_default_email_template()
+
+
+@pytest.fixture
+def client():
+    with app.test_client() as test_client:
+        with test_client.session_transaction() as sess:
+            sess["username"] = "admin"
+        yield test_client
+
+
+@pytest.fixture
+def anon_client():
+    with app.test_client() as test_client:
+        with test_client.session_transaction() as sess:
+            sess.pop("username", None)
+        yield test_client
+
+
+def test_email_templates_requires_admin(anon_client):
+    response = anon_client.get("/admin/email-templates")
+    assert response.status_code == 302
+    assert "/login" in response.headers.get("Location", "")
+
+
+def test_email_templates_lists_default(client):
+    response = client.get("/admin/email-templates")
+    assert response.status_code == 200
+    assert b"job-alert" in response.data
+
+
+def test_email_templates_create_update_delete(client):
+    # Create
+    response = client.post(
+        "/admin/email-templates",
+        data={
+            "action": "create",
+            "name": "Daily Summary",
+            "slug": "daily-summary",
+            "subject": "Summary: {count_label}",
+            "body": "Jobs:{jobs_section}",
+            "is_active": "on",
+        },
+        follow_redirects=False,
+    )
+    assert response.status_code == 302
+    templates = list_email_templates()
+    assert any(t["slug"] == "daily-summary" for t in templates)
+
+    # Update
+    template_row = next(t for t in templates if t["slug"] == "daily-summary")
+    response = client.post(
+        "/admin/email-templates",
+        data={
+            "action": "update",
+            "template_id": template_row["template_id"],
+            "name": "Daily Summary",
+            "slug": "daily-summary",
+            "subject": "Updated: {count_label}",
+            "body": "Updated body {jobs_section}",
+        },
+        follow_redirects=False,
+    )
+    assert response.status_code == 302
+    updated = list_email_templates()
+    updated_row = next(t for t in updated if t["slug"] == "daily-summary")
+    assert "Updated:" in updated_row["subject"]
+
+    # Delete
+    response = client.post(
+        "/admin/email-templates",
+        data={
+            "action": "delete",
+            "template_id": updated_row["template_id"],
+        },
+        follow_redirects=False,
+    )
+    assert response.status_code == 302
+    slugs = [t["slug"] for t in list_email_templates()]
+    assert "daily-summary" not in slugs
+
+
+def test_email_templates_preview(client):
+    templates = list_email_templates()
+    job_alert = next(t for t in templates if t["slug"] == "job-alert")
+    response = client.get(f"/admin/email-templates?preview_id={job_alert['template_id']}")
+    assert response.status_code == 200
+    assert b"Preview" in response.data
+    assert b"Subject" in response.data
+
+
+def test_render_job_alert_email_uses_template_override(client):
+    templates = list_email_templates()
+    job_alert = next(t for t in templates if t["slug"] == "job-alert")
+    update_email_template(
+        job_alert["template_id"],
+        subject="Custom Subject {count}",
+        body="Body {jobs_message}",
+    )
+    rendered = render_job_alert_email([
+        {
+            "title": "Python Developer",
+            "company": "Acme",
+            "location": "Remote",
+            "url": "https://example.com",
+        }
+    ])
+    assert rendered["subject"].startswith("Custom Subject")
+    assert "Python Developer" in rendered["body"]
--- a/tests/test_db_negative_filtering.py
+++ b/tests/test_db_negative_filtering.py
@@ -0,0 +1,21 @@
+import pytest
+import web.db as db
+
+
+def test_upsert_job_details_skips_negative_match(monkeypatch):
+    def fail(*args, **kwargs):  # pragma: no cover - guard against unwanted calls
+        raise AssertionError("should not reach database layers when negative")
+
+    monkeypatch.setattr(db, "_ensure_session", fail)
+    monkeypatch.setattr(db, "insert_log", fail)
+
+    job_data = {
+        "url": "https://example.com/job/neg",
+        "id": "neg123",
+        "is_negative_match": True,
+        "negative_keyword_match": "scam",
+        "negative_match_field": "title",
+    }
+
+    # Should return early without touching the database helpers.
+    db.upsert_job_details(job_data)
--- a/tests/test_email_service.py
+++ b/tests/test_email_service.py
@@ -0,0 +1,106 @@
+import pytest
+
+from web.email_service import (
+    EmailConfigurationError,
+    send_email,
+)
+
+
+def test_send_email_disabled(monkeypatch):
+    called = {}
+
+    def _fake_smtp(*args, **kwargs):  # pragma: no cover - should not be called
+        called["used"] = True
+        raise AssertionError(
+            "SMTP should not be invoked when email is disabled")
+
+    monkeypatch.setattr("web.email_service.smtplib.SMTP", _fake_smtp)
+    monkeypatch.setattr("web.email_service.smtplib.SMTP_SSL", _fake_smtp)
+
+    result = send_email(
+        subject="Hi",
+        body="Test",
+        to="user@example.com",
+        settings={"enabled": False},
+    )
+    assert result is False
+    assert called == {}
+
+
+def test_send_email_sends_message(monkeypatch):
+    events = {"starttls": False, "login": None, "sent": None}
+
+    class FakeSMTP:
+        def __init__(self, *, host, port, timeout):
+            self.host = host
+            self.port = port
+            self.timeout = timeout
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, exc_type, exc, tb):
+            return False
+
+        def ehlo(self):
+            events.setdefault("ehlo", 0)
+            events["ehlo"] += 1
+
+        def starttls(self):
+            events["starttls"] = True
+
+        def login(self, username, password):
+            events["login"] = (username, password)
+
+        def send_message(self, message, *, from_addr, to_addrs):
+            events["sent"] = {
+                "from": from_addr,
+                "to": tuple(to_addrs),
+                "subject": message["Subject"],
+            }
+
+    monkeypatch.setattr("web.email_service.smtplib.SMTP", FakeSMTP)
+    monkeypatch.setattr("web.email_service.smtplib.SMTP_SSL", FakeSMTP)
+
+    settings = {
+        "enabled": True,
+        "from_address": "jobs@example.com",
+        "smtp": {
+            "host": "smtp.example.com",
+            "port": 2525,
+            "timeout": 15,
+            "username": "jobs",
+            "password": "secret",
+            "use_tls": True,
+            "use_ssl": False,
+        },
+    }
+
+    result = send_email(
+        subject="New Jobs",
+        body="You have new jobs waiting.",
+        to=["a@example.com", "b@example.com"],
+        cc="c@example.com",
+        bcc=["d@example.com"],
+        settings=settings,
+    )
+
+    assert result is True
+    assert events["starttls"] is True
+    assert events["login"] == ("jobs", "secret")
+    assert events["sent"] == {
+        "from": "jobs@example.com",
+        "to": ("a@example.com", "b@example.com", "c@example.com", "d@example.com"),
+        "subject": "New Jobs",
+    }
+
+
+def test_send_email_requires_host():
+    settings = {
+        "enabled": True,
+        "from_address": "jobs@example.com",
+        "smtp": {"host": "", "port": 587},
+    }
+    with pytest.raises(EmailConfigurationError):
+        send_email(subject="Hi", body="Test",
+                   to="user@example.com", settings=settings)
--- a/tests/test_email_templates.py
+++ b/tests/test_email_templates.py
@@ -0,0 +1,40 @@
+from datetime import datetime
+
+from web.email_templates import render_job_alert_email
+
+
+def test_render_job_alert_email_with_jobs():
+    jobs = [
+        {
+            "title": "Python Developer",
+            "company": "Acme",
+            "location": "Remote",
+            "url": "https://example.com/jobs/1",
+        },
+        {
+            "title": "Data Engineer",
+            "company": "Globex",
+            "location": "NYC",
+            "url": "https://example.com/jobs/2",
+        },
+    ]
+    ts = datetime(2025, 11, 3, 12, 0)
+    rendered = render_job_alert_email(
+        jobs, region="sfbay", keyword="python", generated_at=ts)
+
+    assert rendered["subject"] == "2 new jobs (region: sfbay, keyword: python)"
+    assert "1. Python Developer" in rendered["body"]
+    assert "Generated at 2025-11-03 12:00 UTC." in rendered["body"]
+    assert rendered["context"]["count"] == 2
+    assert rendered["context"]["jobs_section"].startswith(
+        "\n1. Python Developer")
+
+
+def test_render_job_alert_email_empty():
+    ts = datetime(2025, 11, 3, 12, 0)
+    rendered = render_job_alert_email([], generated_at=ts)
+
+    assert rendered["subject"] == "No new jobs"
+    assert "No jobs matched this alert." in rendered["body"]
+    assert rendered["body"].count("Generated at") == 1
+    assert rendered["context"]["count"] == 0
--- a/tests/test_scheduler.py
+++ b/tests/test_scheduler.py
@@ -1,7 +1,7 @@
 import pytest
 import time
 from unittest.mock import patch, MagicMock
-from web.craigslist import scrape_jobs_with_retry, run_scheduled_scraping
+from web.craigslist import scrape_jobs_with_retry, run_scheduled_scraping, fetch_listings


 class TestScheduler:
@@ -38,3 +38,100 @@ class TestScheduler:
        # This is a basic test to ensure the scheduler can be set up
        from web.craigslist import schedule
        assert schedule is not None
+
+    @patch('web.craigslist.db_get_all_job_urls')
+    @patch('web.craigslist.seed_regions_keywords_from_listings')
+    @patch('web.craigslist.get_all_regions')
+    @patch('web.craigslist.get_all_keywords')
+    @patch('web.craigslist.get_last_fetch_time')
+    @patch('web.craigslist.process_region_keyword')
+    @patch('web.craigslist.upsert_listing')
+    @patch('web.craigslist.insert_log')
+    def test_fetch_listings_return_structure(self, mock_log, mock_upsert, mock_process, mock_last_fetch,
+                                             mock_keywords, mock_regions, mock_seed, mock_db_urls):
+        """Test that fetch_listings returns the correct structure with per-search counts."""
+        # Setup mocks
+        mock_db_urls.return_value = []
+        mock_regions.return_value = [{"name": "sfbay"}]
+        mock_keywords.return_value = [{"name": "python"}]
+        mock_last_fetch.return_value = None  # Never fetched before
+        mock_process.return_value = [
+            ("2025-11-03T10:00:00Z", "sfbay", "python", "Python Dev",
+             "$100k", "San Francisco", "http://example.com/1"),
+            ("2025-11-03T10:00:00Z", "sfbay", "python", "Python Dev",
+             "$100k", "San Francisco", "http://example.com/2"),
+        ]
+
+        # Collect messages and get return value from generator
+        gen = fetch_listings()
+        messages = []
+        result = None
+        try:
+            while True:
+                messages.append(next(gen))
+        except StopIteration as e:
+            result = e.value
+
+        # Verify return structure
+        assert result is not None
+        assert "discovered" in result
+        assert "new" in result
+        assert "by_search" in result
+        assert isinstance(result.get("by_search"), list)
+        assert result.get("discovered") == 2
+        assert result.get("new") == 2
+
+    @patch('web.craigslist.db_get_all_job_urls')
+    @patch('web.craigslist.seed_regions_keywords_from_listings')
+    @patch('web.craigslist.get_all_regions')
+    @patch('web.craigslist.get_all_keywords')
+    @patch('web.craigslist.get_last_fetch_time')
+    @patch('web.craigslist.process_region_keyword')
+    @patch('web.craigslist.upsert_listing')
+    @patch('web.craigslist.insert_log')
+    def test_fetch_listings_per_search_count(self, mock_log, mock_upsert, mock_process, mock_last_fetch,
+                                             mock_keywords, mock_regions, mock_seed, mock_db_urls):
+        """Test that fetch_listings correctly counts jobs per search."""
+        # Setup mocks
+        mock_db_urls.return_value = []
+        mock_regions.return_value = [{"name": "sfbay"}, {"name": "losangeles"}]
+        mock_keywords.return_value = [{"name": "python"}, {"name": "java"}]
+        mock_last_fetch.return_value = None  # Never fetched before
+
+        # Mock process_region_keyword to return different counts for each search
+        def mock_process_impl(region, keyword, discovered_urls):
+            # Use unique URLs per search to get the total discovered count
+            base_url = f"http://example.com/{region}/{keyword}"
+            counts = {
+                ("sfbay", "python"): 3,
+                ("sfbay", "java"): 2,
+                ("losangeles", "python"): 4,
+                ("losangeles", "java"): 1,
+            }
+            count = counts.get((region, keyword), 0)
+            return [(f"2025-11-03T10:00:00Z", region, keyword, f"Job {i}", "$100k", region, f"{base_url}/{i}")
+                    for i in range(count)]
+
+        mock_process.side_effect = mock_process_impl
+
+        # Collect result from generator
+        gen = fetch_listings()
+        messages = []
+        result = None
+        try:
+            while True:
+                messages.append(next(gen))
+        except StopIteration as e:
+            result = e.value
+
+        # Verify per-search counts
+        assert result is not None
+        by_search = result.get("by_search", [])
+        assert len(by_search) == 4
+
+        search_data = {(r.get("region"), r.get("keyword"))                       : r.get("count") for r in by_search}
+        assert search_data.get(("sfbay", "python")) == 3
+        assert search_data.get(("sfbay", "java")) == 2
+        assert search_data.get(("losangeles", "python")) == 4
+        assert search_data.get(("losangeles", "java")) == 1
+        assert result.get("discovered") == 10  # Total unique jobs
--- a/tests/test_scraper.py
+++ b/tests/test_scraper.py
@@ -0,0 +1,384 @@
+import pytest
+from web.scraper import scrape_job_page, extract_contact_info
+from web.craigslist import process_job_url, scraper
+
+
+def _make_negative_job(url: str) -> dict:
+    return {
+        "url": url,
+        "title": "SCAM role",
+        "company": "Test Co",
+        "location": "Remote",
+        "description": "This is a scam offer",
+        "id": "job123",
+        "posted_time": "",
+        "reply_url": "N/A",
+        "contact_email": "N/A",
+        "contact_phone": "N/A",
+        "contact_name": "N/A",
+        "is_negative_match": True,
+        "negative_keyword_match": "scam",
+        "negative_match_field": "title",
+    }
+
+
+class TestExtractContactInfo:
+    """Test suite for contact information extraction."""
+
+    def test_extract_email_from_mailto_link(self):
+        """Test extraction of email from mailto link."""
+        reply_url = "mailto:contact@example.com?subject=Job%20Inquiry"
+        contact_info = extract_contact_info(reply_url)
+
+        assert contact_info["email"] == "contact@example.com"
+        assert contact_info["phone"] == "N/A"
+        assert contact_info["contact_name"] == "N/A"
+
+    def test_extract_phone_from_tel_link(self):
+        """Test extraction of phone from tel link."""
+        reply_url = "tel:+1234567890"
+        contact_info = extract_contact_info(reply_url)
+
+        assert contact_info["email"] == "N/A"
+        assert contact_info["phone"] == "+1234567890"
+        assert contact_info["contact_name"] == "N/A"
+
+    def test_extract_email_from_url_parameter(self):
+        """Test extraction of email from URL query parameters."""
+        reply_url = "https://example.com/contact?email=jobs@company.com&name=John%20Doe"
+        contact_info = extract_contact_info(reply_url)
+
+        assert contact_info["email"] == "jobs@company.com"
+        assert contact_info["contact_name"] == "John Doe"
+
+    def test_extract_phone_from_url_parameter(self):
+        """Test extraction of phone from URL query parameters."""
+        reply_url = "https://example.com/apply?phone=555-1234&email=contact@test.com"
+        contact_info = extract_contact_info(reply_url)
+
+        assert contact_info["phone"] == "555-1234"
+        assert contact_info["email"] == "contact@test.com"
+
+    def test_extract_contact_name_from_url_parameter(self):
+        """Test extraction of contact name from URL query parameters."""
+        reply_url = "https://example.com/reply?name=Alice%20Smith&contact_name=Bob%20Jones"
+        contact_info = extract_contact_info(reply_url)
+
+        # Should prefer contact_name over name
+        assert contact_info["contact_name"] == "Bob Jones"
+
+    def test_extract_all_fields_from_url(self):
+        """Test extraction of all fields from URL parameters."""
+        reply_url = "https://example.com/contact?email=hr@company.com&phone=555-9876&contact_name=Jane%20Doe"
+        contact_info = extract_contact_info(reply_url)
+
+        assert contact_info["email"] == "hr@company.com"
+        assert contact_info["phone"] == "555-9876"
+        assert contact_info["contact_name"] == "Jane Doe"
+
+    def test_handle_empty_reply_url(self):
+        """Test handling of empty reply URL."""
+        contact_info = extract_contact_info("")
+
+        assert contact_info["email"] == "N/A"
+        assert contact_info["phone"] == "N/A"
+        assert contact_info["contact_name"] == "N/A"
+
+    def test_handle_na_reply_url(self):
+        """Test handling of N/A reply URL."""
+        contact_info = extract_contact_info("N/A")
+
+        assert contact_info["email"] == "N/A"
+        assert contact_info["phone"] == "N/A"
+        assert contact_info["contact_name"] == "N/A"
+
+    def test_handle_none_reply_url(self):
+        """Test handling of None reply URL."""
+        contact_info = extract_contact_info(None)
+
+        assert contact_info["email"] == "N/A"
+        assert contact_info["phone"] == "N/A"
+        assert contact_info["contact_name"] == "N/A"
+
+    def test_handle_invalid_url(self):
+        """Test handling of invalid URL (graceful fallback)."""
+        reply_url = "not a valid url at all"
+        contact_info = extract_contact_info(reply_url)
+
+        # Should return all N/A values without crashing
+        assert contact_info["email"] == "N/A"
+        assert contact_info["phone"] == "N/A"
+        assert contact_info["contact_name"] == "N/A"
+
+    def test_multiple_parameter_variations(self):
+        """Test that function finds email despite multiple parameter name variations."""
+        reply_url = "https://example.com/reply?from_email=sender@test.com&other=value"
+        contact_info = extract_contact_info(reply_url)
+
+        assert contact_info["email"] == "sender@test.com"
+
+    def test_telephone_parameter_name(self):
+        """Test extraction using 'telephone' parameter name."""
+        reply_url = "https://example.com/contact?telephone=555-0000"
+        contact_info = extract_contact_info(reply_url)
+
+        assert contact_info["phone"] == "555-0000"
+
+
+class TestScrapeJobPageContactInfo:
+    """Test suite for scrape_job_page contact information extraction."""
+
+    def test_scrape_job_page_includes_contact_fields(self):
+        """Test that scrape_job_page includes contact information in return dict."""
+        html_content = """
+        <html>
+            <h1 class="postingtitle">Software Engineer</h1>
+            <h2 class="company-name">Tech Company</h2>
+            <button class="reply-button" data-href="mailto:jobs@techco.com"></button>
+            <div id="map" data-latitude="37.7749" data-longitude="-122.4194" data-accuracy="rooftop"></div>
+            <section id="postingbody">
+                <p>This is a test job description</p>
+            </section>
+            <div class="postinginfos">
+                <p class="postinginfo">posting id: 12345abc</p>
+                <time class="date timeago" datetime="2025-11-03T10:00:00"></time>
+            </div>
+        </html>
+        """
+
+        job_data = scrape_job_page(html_content, "https://example.com/job/123")
+
+        # Verify all expected keys are present
+        assert "contact_email" in job_data
+        assert "contact_phone" in job_data
+        assert "contact_name" in job_data
+        assert "reply_url" in job_data
+
+    def test_scrape_job_page_extracts_mailto_contact(self):
+        """Test that scrape_job_page correctly extracts email from mailto link."""
+        html_content = """
+        <html>
+            <h1 class="postingtitle">Job Title</h1>
+            <h2 class="company-name">Company</h2>
+            <button class="reply-button" data-href="mailto:hiring@company.com?subject=Application"></button>
+            <div id="map"></div>
+            <section id="postingbody"><p>Job desc</p></section>
+            <div class="postinginfos">
+                <p class="postinginfo">id: xyz</p>
+            </div>
+        </html>
+        """
+
+        job_data = scrape_job_page(html_content, "https://example.com/job/456")
+
+        assert job_data["contact_email"] == "hiring@company.com"
+        assert job_data["reply_url"] == "mailto:hiring@company.com?subject=Application"
+
+    def test_scrape_job_page_no_reply_button(self):
+        """Test scrape_job_page when no reply button is present."""
+        html_content = """
+        <html>
+            <h1 class="postingtitle">Job Title</h1>
+            <h2 class="company-name">Company</h2>
+            <div id="map"></div>
+            <section id="postingbody"><p>Job desc</p></section>
+            <div class="postinginfos">
+                <p class="postinginfo">id: xyz</p>
+            </div>
+        </html>
+        """
+
+        job_data = scrape_job_page(html_content, "https://example.com/job/789")
+
+        # Should have N/A for all contact fields
+        assert job_data["reply_url"] == "N/A"
+        assert job_data["contact_email"] == "N/A"
+        assert job_data["contact_phone"] == "N/A"
+        assert job_data["contact_name"] == "N/A"
+
+    def test_scrape_job_page_with_url_based_reply(self):
+        """Test scrape_job_page with URL-based reply link containing contact info."""
+        html_content = """
+        <html>
+            <h1 class="postingtitle">Manager Position</h1>
+            <h2 class="company-name">BigCorp</h2>
+            <button class="reply-button" data-href="https://apply.bigcorp.com?email=hr@bigcorp.com&name=HR%20Team"></button>
+            <div id="map"></div>
+            <section id="postingbody"><p>Apply now</p></section>
+            <div class="postinginfos">
+                <p class="postinginfo">id: manager123</p>
+            </div>
+        </html>
+        """
+
+        job_data = scrape_job_page(html_content, "https://example.com/job/999")
+
+        assert job_data["contact_email"] == "hr@bigcorp.com"
+        assert job_data["contact_name"] == "HR Team"
+
+    def test_scrape_job_page_negative_keyword_match(self, monkeypatch):
+        """Test that negative keyword detection flags matching jobs."""
+
+        monkeypatch.setattr(
+            "web.scraper.get_negative_keywords", lambda: ["scam"])
+
+        html_content = """
+        <html>
+            <h1 class="postingtitle">Great Opportunity</h1>
+            <h2 class="company-name">SCAM Corp</h2>
+            <section id="postingbody"><p>This is a scam offer</p></section>
+        </html>
+        """
+
+        job_data = scrape_job_page(
+            html_content, "https://example.com/job/negative")
+
+        assert job_data["is_negative_match"] is True
+        assert job_data["negative_keyword_match"] == "scam"
+        assert job_data["negative_match_field"] in {
+            "title", "company", "description"}
+
+    def test_scrape_job_page_no_negative_match(self, monkeypatch):
+        """Test that jobs without matching keywords are not flagged."""
+
+        monkeypatch.setattr(
+            "web.scraper.get_negative_keywords", lambda: ["scam"])
+
+        html_content = """
+        <html>
+            <h1 class="postingtitle">Legit Opportunity</h1>
+            <h2 class="company-name">Honest Corp</h2>
+            <section id="postingbody"><p>We pay well and on time.</p></section>
+        </html>
+        """
+
+        job_data = scrape_job_page(
+            html_content, "https://example.com/job/positive")
+
+        assert job_data["is_negative_match"] is False
+        assert job_data["negative_keyword_match"] is None
+        assert job_data["negative_match_field"] is None
+
+
+class TestProcessJobUrlNegativeFiltering:
+    def test_process_job_url_skips_negative_match(self, monkeypatch):
+        job_url = "https://example.com/job/negative"
+        remove_calls = []
+        upsert_calls = []
+
+        monkeypatch.setattr(
+            "web.craigslist.get_last_fetch_time", lambda url: None)
+        monkeypatch.setattr(
+            "web.craigslist.insert_log",
+            lambda *args, **kwargs: None,
+        )
+        monkeypatch.setattr(
+            "web.craigslist.make_request_with_retry",
+            lambda url, attempts: "<html />",
+        )
+        monkeypatch.setattr(
+            "web.craigslist.scrape_job_page",
+            lambda content, url: _make_negative_job(url),
+        )
+
+        def fake_upsert(job_data, region="", keyword=""):
+            upsert_calls.append(job_data)
+
+        def fake_remove(url):
+            remove_calls.append(url)
+
+        monkeypatch.setattr("web.craigslist.upsert_job_details", fake_upsert)
+        monkeypatch.setattr("web.craigslist.remove_job", fake_remove)
+
+        messages = list(process_job_url(job_url, region="test", keyword="kw"))
+
+        assert any("Skipping job" in message for message in messages)
+        assert remove_calls == [job_url]
+        assert upsert_calls == []
+
+
+class TestScraperPipelineNegativeFiltering:
+    def test_scraper_skips_negative_jobs(self, monkeypatch):
+        job_url = "https://example.com/job/negative"
+        remove_calls = []
+        upsert_calls = []
+
+        monkeypatch.setattr("web.craigslist.db_init", lambda: None)
+
+        def fake_fetch_listings():
+            yield "Fake listing fetch\n"
+            return {"discovered": 0, "new": 0, "by_search": [], "new_jobs": []}
+
+        monkeypatch.setattr("web.craigslist.fetch_listings",
+                            fake_fetch_listings)
+        monkeypatch.setattr(
+            "web.craigslist.db_get_all_job_urls",
+            lambda: [{"url": job_url, "region": "reg", "keyword": "kw"}],
+        )
+        monkeypatch.setattr(
+            "web.craigslist.get_last_fetch_time", lambda url: None)
+        monkeypatch.setattr("web.craigslist.insert_log",
+                            lambda *args, **kwargs: None)
+        monkeypatch.setattr(
+            "web.craigslist.make_request_with_retry", lambda url, attempts: "<html />"
+        )
+        monkeypatch.setattr("web.craigslist.url_to_job_id",
+                            lambda url: "job123")
+        monkeypatch.setattr(
+            "web.craigslist.scrape_job_page",
+            lambda content, url: _make_negative_job(url),
+        )
+
+        def fake_upsert(job_data, region="", keyword=""):
+            upsert_calls.append(job_data)
+
+        def fake_remove(url):
+            remove_calls.append(url)
+
+        monkeypatch.setattr("web.craigslist.upsert_job_details", fake_upsert)
+        monkeypatch.setattr("web.craigslist.remove_job", fake_remove)
+
+        messages = list(scraper())
+
+        assert any("Skipping job" in message for message in messages)
+        assert remove_calls == [job_url]
+        assert upsert_calls == []
+
+
+class TestScraperEmailNotifications:
+    def test_scraper_sends_email_for_new_jobs(self, monkeypatch):
+        monkeypatch.setattr("web.craigslist.db_init", lambda: None)
+
+        new_jobs = [
+            {
+                "title": "Python Developer",
+                "company": "Acme",
+                "location": "Remote",
+                "url": "https://example.com/jobs/1",
+            }
+        ]
+
+        def fake_fetch_listings():
+            yield "Fake listing fetch\n"
+            return {
+                "discovered": 1,
+                "new": 1,
+                "by_search": [],
+                "new_jobs": new_jobs,
+            }
+
+        monkeypatch.setattr("web.craigslist.fetch_listings", fake_fetch_listings)
+        monkeypatch.setattr("web.craigslist.db_get_all_job_urls", lambda: [])
+
+        calls = {}
+
+        def fake_send_alert(jobs):
+            calls["jobs"] = jobs
+            return True, "sent"
+
+        monkeypatch.setattr("web.craigslist._send_new_job_alert", fake_send_alert)
+
+        messages = list(scraper())
+
+        assert calls["jobs"] == new_jobs
+        assert any("Job alert email sent." in message for message in messages)
--- a/tests/test_user_negative_keywords.py
+++ b/tests/test_user_negative_keywords.py
@@ -0,0 +1,148 @@
+import pytest
+from web.db import (
+    db_init,
+    create_or_update_user,
+    upsert_negative_keyword,
+    set_user_negative_keywords,
+    get_user_negative_keywords,
+    upsert_listing,
+    upsert_job_details,
+    get_all_jobs,
+    UserNegativeKeyword,
+    NegativeKeyword
+)
+from web.app import app
+from web.utils import filter_jobs
+
+
+@pytest.fixture
+def client():
+    app.config['TESTING'] = True
+    app.config['WTF_CSRF_ENABLED'] = False
+    with app.test_client() as client:
+        with app.app_context():
+            db_init()
+        yield client
+
+
+def test_negative_keyword_db_ops():
+    db_init()
+    username = "test_neg_user"
+    create_or_update_user(username, "password")
+
+    # Test upsert
+    kid = upsert_negative_keyword("scam")
+    assert kid > 0
+    kid2 = upsert_negative_keyword("scam")
+    assert kid == kid2
+
+    # Test set/get
+    set_user_negative_keywords(username, ["scam", "unpaid"])
+    nks = get_user_negative_keywords(username)
+    assert len(nks) == 2
+    assert "scam" in nks
+    assert "unpaid" in nks
+
+    # Test update
+    set_user_negative_keywords(username, ["scam"])
+    nks = get_user_negative_keywords(username)
+    assert len(nks) == 1
+    assert "scam" in nks
+    assert "unpaid" not in nks
+
+    # Test clear
+    set_user_negative_keywords(username, [])
+    nks = get_user_negative_keywords(username)
+    assert len(nks) == 0
+
+
+def test_settings_endpoint(client):
+    username = "test_settings_user"
+    create_or_update_user(username, "password")
+
+    # Login
+    client.post('/login', data={'username': username, 'password': 'password'})
+
+    # Post settings
+    resp = client.post('/settings', json={
+        'regions': [],
+        'keywords': [],
+        'negative_keywords': ['spam', 'junk']
+    })
+    assert resp.status_code == 200
+
+    # Verify DB
+    nks = get_user_negative_keywords(username)
+    assert "spam" in nks
+    assert "junk" in nks
+
+
+def test_job_filtering_with_negative_keywords():
+    # Setup jobs
+    jobs = [
+        {"title": "Great Job", "description": "Good pay"},
+        {"title": "Bad Job", "description": "This is a scam"},
+        {"title": "Okay Job", "description": "Average pay"},
+    ]
+
+    # Filter
+    filtered = filter_jobs(jobs, negative_keywords=["scam"])
+    assert len(filtered) == 2
+    assert "Bad Job" not in [j['title'] for j in filtered]
+
+    filtered = filter_jobs(jobs, negative_keywords=["pay"])
+    assert len(filtered) == 1
+    assert "Bad Job" in [j['title']
+                         for j in filtered]  # "scam" job doesn't have "pay"
+
+
+def test_jobs_endpoint_filtering(client):
+    username = "test_filter_user"
+    create_or_update_user(username, "password")
+
+    # Setup DB with jobs
+    upsert_listing(
+        url="http://example.com/1",
+        region="sfbay",
+        keyword="python",
+        title="Good Python Job",
+        pay="$100k",
+        location="SF",
+        timestamp="now"
+    )
+    upsert_job_details({
+        "url": "http://example.com/1",
+        "id": "1",
+        "title": "Good Python Job",
+        "description": "This is a legit job."
+    })
+
+    upsert_listing(
+        url="http://example.com/2",
+        region="sfbay",
+        keyword="python",
+        title="Bad Python Job",
+        pay="$100k",
+        location="SF",
+        timestamp="now"
+    )
+    upsert_job_details({
+        "url": "http://example.com/2",
+        "id": "2",
+        "title": "Bad Python Job",
+        "description": "This is a scam job."
+    })
+
+    # Login
+    client.post('/login', data={'username': username, 'password': 'password'})
+
+    # Set negative keywords
+    set_user_negative_keywords(username, ["scam"])
+
+    # Fetch jobs
+    resp = client.get('/jobs')
+    data = resp.get_json()
+
+    titles = [j['title'] for j in data]
+    assert "Good Python Job" in titles
+    assert "Bad Python Job" not in titles
--- a/tests/test_utils_config.py
+++ b/tests/test_utils_config.py
@@ -16,3 +16,23 @@ def test_http_settings_helpers():
    assert isinstance(utils.get_backoff_factor(), int)
    assert isinstance(utils.get_min_delay(), int)
    assert isinstance(utils.get_max_delay(), int)
+
+
+def test_negative_keywords_helper():
+    keywords = utils.get_negative_keywords()
+    assert isinstance(keywords, list)
+    for kw in keywords:
+        assert isinstance(kw, str)
+        assert kw == kw.lower()
+
+
+def test_email_settings_helper():
+    settings = utils.get_email_settings()
+    assert isinstance(settings, dict)
+    assert 'enabled' in settings
+    assert 'from_address' in settings
+    smtp = settings.get('smtp')
+    assert isinstance(smtp, dict)
+    assert 'host' in smtp
+    assert isinstance(smtp.get('port'), int)
+    assert isinstance(settings.get('recipients'), list)