feat: Implement email sending utilities and templates for job notifications
Some checks failed
CI/CD Pipeline / test (push) Failing after 4m9s
Some checks failed
CI/CD Pipeline / test (push) Failing after 4m9s
- Added email_service.py for sending emails with SMTP configuration. - Introduced email_templates.py to render job alert email subjects and bodies. - Enhanced scraper.py to extract contact information from job listings. - Updated settings.js to handle negative keyword input validation. - Created email.html and email_templates.html for managing email subscriptions and templates in the admin interface. - Modified base.html to include links for email alerts and templates. - Expanded user settings.html to allow management of negative keywords. - Updated utils.py to include functions for retrieving negative keywords and email settings. - Enhanced job filtering logic to exclude jobs containing negative keywords.
This commit is contained in:
84
tests/test_admin_email.py
Normal file
84
tests/test_admin_email.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import pytest
|
||||
from sqlalchemy import text
|
||||
|
||||
from web.app import app
|
||||
from web.db import (
|
||||
db_init,
|
||||
create_or_update_user,
|
||||
subscribe_email,
|
||||
list_email_subscriptions,
|
||||
_ensure_session,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def initialize_app():
|
||||
app.config.update(TESTING=True, WTF_CSRF_ENABLED=False)
|
||||
with app.app_context():
|
||||
db_init()
|
||||
create_or_update_user("admin", password="secret",
|
||||
is_admin=True, is_active=True)
|
||||
# Clear subscriptions before and after each test to avoid leakage
|
||||
with _ensure_session() as session:
|
||||
session.execute(text("DELETE FROM email_subscriptions"))
|
||||
session.commit()
|
||||
yield
|
||||
with _ensure_session() as session:
|
||||
session.execute(text("DELETE FROM email_subscriptions"))
|
||||
session.commit()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
with app.test_client() as test_client:
|
||||
with test_client.session_transaction() as sess:
|
||||
sess["username"] = "admin"
|
||||
yield test_client
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def anon_client():
|
||||
with app.test_client() as test_client:
|
||||
# Ensure no admin session present
|
||||
with test_client.session_transaction() as sess:
|
||||
sess.pop("username", None)
|
||||
yield test_client
|
||||
|
||||
|
||||
def test_admin_emails_requires_admin(anon_client):
|
||||
response = anon_client.get("/admin/emails")
|
||||
assert response.status_code == 302
|
||||
assert "/login" in response.headers.get("Location", "")
|
||||
|
||||
|
||||
def test_admin_emails_lists_subscriptions(client):
|
||||
subscribe_email("alice@example.com")
|
||||
response = client.get("/admin/emails")
|
||||
assert response.status_code == 200
|
||||
assert b"alice@example.com" in response.data
|
||||
|
||||
|
||||
def test_admin_emails_can_subscribe(client):
|
||||
response = client.post(
|
||||
"/admin/emails",
|
||||
data={"action": "subscribe", "email": "bob@example.com"},
|
||||
follow_redirects=False,
|
||||
)
|
||||
assert response.status_code == 302
|
||||
emails = list_email_subscriptions()
|
||||
assert any(sub["email"] == "bob@example.com" and sub["is_active"]
|
||||
for sub in emails)
|
||||
|
||||
|
||||
def test_admin_emails_can_unsubscribe(client):
|
||||
subscribe_email("carol@example.com")
|
||||
response = client.post(
|
||||
"/admin/emails",
|
||||
data={"action": "unsubscribe", "email": "carol@example.com"},
|
||||
follow_redirects=False,
|
||||
)
|
||||
assert response.status_code == 302
|
||||
emails = list_email_subscriptions()
|
||||
matching = [sub for sub in emails if sub["email"] == "carol@example.com"]
|
||||
assert matching
|
||||
assert matching[0]["is_active"] is False
|
||||
138
tests/test_admin_email_templates.py
Normal file
138
tests/test_admin_email_templates.py
Normal file
@@ -0,0 +1,138 @@
|
||||
import pytest
|
||||
from sqlalchemy import text
|
||||
|
||||
from web.app import app
|
||||
from web.db import (
|
||||
db_init,
|
||||
create_or_update_user,
|
||||
list_email_templates,
|
||||
update_email_template,
|
||||
_ensure_session,
|
||||
ensure_default_email_template,
|
||||
)
|
||||
from web.email_templates import render_job_alert_email
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def setup_database():
|
||||
app.config.update(TESTING=True, WTF_CSRF_ENABLED=False)
|
||||
with app.app_context():
|
||||
db_init()
|
||||
create_or_update_user("admin", password="secret", is_admin=True, is_active=True)
|
||||
with _ensure_session() as session:
|
||||
session.execute(text("DELETE FROM email_templates"))
|
||||
session.commit()
|
||||
ensure_default_email_template()
|
||||
yield
|
||||
with _ensure_session() as session:
|
||||
session.execute(text("DELETE FROM email_templates"))
|
||||
session.commit()
|
||||
ensure_default_email_template()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
with app.test_client() as test_client:
|
||||
with test_client.session_transaction() as sess:
|
||||
sess["username"] = "admin"
|
||||
yield test_client
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def anon_client():
|
||||
with app.test_client() as test_client:
|
||||
with test_client.session_transaction() as sess:
|
||||
sess.pop("username", None)
|
||||
yield test_client
|
||||
|
||||
|
||||
def test_email_templates_requires_admin(anon_client):
|
||||
response = anon_client.get("/admin/email-templates")
|
||||
assert response.status_code == 302
|
||||
assert "/login" in response.headers.get("Location", "")
|
||||
|
||||
|
||||
def test_email_templates_lists_default(client):
|
||||
response = client.get("/admin/email-templates")
|
||||
assert response.status_code == 200
|
||||
assert b"job-alert" in response.data
|
||||
|
||||
|
||||
def test_email_templates_create_update_delete(client):
|
||||
# Create
|
||||
response = client.post(
|
||||
"/admin/email-templates",
|
||||
data={
|
||||
"action": "create",
|
||||
"name": "Daily Summary",
|
||||
"slug": "daily-summary",
|
||||
"subject": "Summary: {count_label}",
|
||||
"body": "Jobs:{jobs_section}",
|
||||
"is_active": "on",
|
||||
},
|
||||
follow_redirects=False,
|
||||
)
|
||||
assert response.status_code == 302
|
||||
templates = list_email_templates()
|
||||
assert any(t["slug"] == "daily-summary" for t in templates)
|
||||
|
||||
# Update
|
||||
template_row = next(t for t in templates if t["slug"] == "daily-summary")
|
||||
response = client.post(
|
||||
"/admin/email-templates",
|
||||
data={
|
||||
"action": "update",
|
||||
"template_id": template_row["template_id"],
|
||||
"name": "Daily Summary",
|
||||
"slug": "daily-summary",
|
||||
"subject": "Updated: {count_label}",
|
||||
"body": "Updated body {jobs_section}",
|
||||
},
|
||||
follow_redirects=False,
|
||||
)
|
||||
assert response.status_code == 302
|
||||
updated = list_email_templates()
|
||||
updated_row = next(t for t in updated if t["slug"] == "daily-summary")
|
||||
assert "Updated:" in updated_row["subject"]
|
||||
|
||||
# Delete
|
||||
response = client.post(
|
||||
"/admin/email-templates",
|
||||
data={
|
||||
"action": "delete",
|
||||
"template_id": updated_row["template_id"],
|
||||
},
|
||||
follow_redirects=False,
|
||||
)
|
||||
assert response.status_code == 302
|
||||
slugs = [t["slug"] for t in list_email_templates()]
|
||||
assert "daily-summary" not in slugs
|
||||
|
||||
|
||||
def test_email_templates_preview(client):
|
||||
templates = list_email_templates()
|
||||
job_alert = next(t for t in templates if t["slug"] == "job-alert")
|
||||
response = client.get(f"/admin/email-templates?preview_id={job_alert['template_id']}")
|
||||
assert response.status_code == 200
|
||||
assert b"Preview" in response.data
|
||||
assert b"Subject" in response.data
|
||||
|
||||
|
||||
def test_render_job_alert_email_uses_template_override(client):
|
||||
templates = list_email_templates()
|
||||
job_alert = next(t for t in templates if t["slug"] == "job-alert")
|
||||
update_email_template(
|
||||
job_alert["template_id"],
|
||||
subject="Custom Subject {count}",
|
||||
body="Body {jobs_message}",
|
||||
)
|
||||
rendered = render_job_alert_email([
|
||||
{
|
||||
"title": "Python Developer",
|
||||
"company": "Acme",
|
||||
"location": "Remote",
|
||||
"url": "https://example.com",
|
||||
}
|
||||
])
|
||||
assert rendered["subject"].startswith("Custom Subject")
|
||||
assert "Python Developer" in rendered["body"]
|
||||
21
tests/test_db_negative_filtering.py
Normal file
21
tests/test_db_negative_filtering.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import pytest
|
||||
import web.db as db
|
||||
|
||||
|
||||
def test_upsert_job_details_skips_negative_match(monkeypatch):
|
||||
def fail(*args, **kwargs): # pragma: no cover - guard against unwanted calls
|
||||
raise AssertionError("should not reach database layers when negative")
|
||||
|
||||
monkeypatch.setattr(db, "_ensure_session", fail)
|
||||
monkeypatch.setattr(db, "insert_log", fail)
|
||||
|
||||
job_data = {
|
||||
"url": "https://example.com/job/neg",
|
||||
"id": "neg123",
|
||||
"is_negative_match": True,
|
||||
"negative_keyword_match": "scam",
|
||||
"negative_match_field": "title",
|
||||
}
|
||||
|
||||
# Should return early without touching the database helpers.
|
||||
db.upsert_job_details(job_data)
|
||||
106
tests/test_email_service.py
Normal file
106
tests/test_email_service.py
Normal file
@@ -0,0 +1,106 @@
|
||||
import pytest
|
||||
|
||||
from web.email_service import (
|
||||
EmailConfigurationError,
|
||||
send_email,
|
||||
)
|
||||
|
||||
|
||||
def test_send_email_disabled(monkeypatch):
|
||||
called = {}
|
||||
|
||||
def _fake_smtp(*args, **kwargs): # pragma: no cover - should not be called
|
||||
called["used"] = True
|
||||
raise AssertionError(
|
||||
"SMTP should not be invoked when email is disabled")
|
||||
|
||||
monkeypatch.setattr("web.email_service.smtplib.SMTP", _fake_smtp)
|
||||
monkeypatch.setattr("web.email_service.smtplib.SMTP_SSL", _fake_smtp)
|
||||
|
||||
result = send_email(
|
||||
subject="Hi",
|
||||
body="Test",
|
||||
to="user@example.com",
|
||||
settings={"enabled": False},
|
||||
)
|
||||
assert result is False
|
||||
assert called == {}
|
||||
|
||||
|
||||
def test_send_email_sends_message(monkeypatch):
|
||||
events = {"starttls": False, "login": None, "sent": None}
|
||||
|
||||
class FakeSMTP:
|
||||
def __init__(self, *, host, port, timeout):
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.timeout = timeout
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def ehlo(self):
|
||||
events.setdefault("ehlo", 0)
|
||||
events["ehlo"] += 1
|
||||
|
||||
def starttls(self):
|
||||
events["starttls"] = True
|
||||
|
||||
def login(self, username, password):
|
||||
events["login"] = (username, password)
|
||||
|
||||
def send_message(self, message, *, from_addr, to_addrs):
|
||||
events["sent"] = {
|
||||
"from": from_addr,
|
||||
"to": tuple(to_addrs),
|
||||
"subject": message["Subject"],
|
||||
}
|
||||
|
||||
monkeypatch.setattr("web.email_service.smtplib.SMTP", FakeSMTP)
|
||||
monkeypatch.setattr("web.email_service.smtplib.SMTP_SSL", FakeSMTP)
|
||||
|
||||
settings = {
|
||||
"enabled": True,
|
||||
"from_address": "jobs@example.com",
|
||||
"smtp": {
|
||||
"host": "smtp.example.com",
|
||||
"port": 2525,
|
||||
"timeout": 15,
|
||||
"username": "jobs",
|
||||
"password": "secret",
|
||||
"use_tls": True,
|
||||
"use_ssl": False,
|
||||
},
|
||||
}
|
||||
|
||||
result = send_email(
|
||||
subject="New Jobs",
|
||||
body="You have new jobs waiting.",
|
||||
to=["a@example.com", "b@example.com"],
|
||||
cc="c@example.com",
|
||||
bcc=["d@example.com"],
|
||||
settings=settings,
|
||||
)
|
||||
|
||||
assert result is True
|
||||
assert events["starttls"] is True
|
||||
assert events["login"] == ("jobs", "secret")
|
||||
assert events["sent"] == {
|
||||
"from": "jobs@example.com",
|
||||
"to": ("a@example.com", "b@example.com", "c@example.com", "d@example.com"),
|
||||
"subject": "New Jobs",
|
||||
}
|
||||
|
||||
|
||||
def test_send_email_requires_host():
|
||||
settings = {
|
||||
"enabled": True,
|
||||
"from_address": "jobs@example.com",
|
||||
"smtp": {"host": "", "port": 587},
|
||||
}
|
||||
with pytest.raises(EmailConfigurationError):
|
||||
send_email(subject="Hi", body="Test",
|
||||
to="user@example.com", settings=settings)
|
||||
40
tests/test_email_templates.py
Normal file
40
tests/test_email_templates.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from datetime import datetime
|
||||
|
||||
from web.email_templates import render_job_alert_email
|
||||
|
||||
|
||||
def test_render_job_alert_email_with_jobs():
|
||||
jobs = [
|
||||
{
|
||||
"title": "Python Developer",
|
||||
"company": "Acme",
|
||||
"location": "Remote",
|
||||
"url": "https://example.com/jobs/1",
|
||||
},
|
||||
{
|
||||
"title": "Data Engineer",
|
||||
"company": "Globex",
|
||||
"location": "NYC",
|
||||
"url": "https://example.com/jobs/2",
|
||||
},
|
||||
]
|
||||
ts = datetime(2025, 11, 3, 12, 0)
|
||||
rendered = render_job_alert_email(
|
||||
jobs, region="sfbay", keyword="python", generated_at=ts)
|
||||
|
||||
assert rendered["subject"] == "2 new jobs (region: sfbay, keyword: python)"
|
||||
assert "1. Python Developer" in rendered["body"]
|
||||
assert "Generated at 2025-11-03 12:00 UTC." in rendered["body"]
|
||||
assert rendered["context"]["count"] == 2
|
||||
assert rendered["context"]["jobs_section"].startswith(
|
||||
"\n1. Python Developer")
|
||||
|
||||
|
||||
def test_render_job_alert_email_empty():
|
||||
ts = datetime(2025, 11, 3, 12, 0)
|
||||
rendered = render_job_alert_email([], generated_at=ts)
|
||||
|
||||
assert rendered["subject"] == "No new jobs"
|
||||
assert "No jobs matched this alert." in rendered["body"]
|
||||
assert rendered["body"].count("Generated at") == 1
|
||||
assert rendered["context"]["count"] == 0
|
||||
@@ -1,7 +1,7 @@
|
||||
import pytest
|
||||
import time
|
||||
from unittest.mock import patch, MagicMock
|
||||
from web.craigslist import scrape_jobs_with_retry, run_scheduled_scraping
|
||||
from web.craigslist import scrape_jobs_with_retry, run_scheduled_scraping, fetch_listings
|
||||
|
||||
|
||||
class TestScheduler:
|
||||
@@ -38,3 +38,100 @@ class TestScheduler:
|
||||
# This is a basic test to ensure the scheduler can be set up
|
||||
from web.craigslist import schedule
|
||||
assert schedule is not None
|
||||
|
||||
@patch('web.craigslist.db_get_all_job_urls')
|
||||
@patch('web.craigslist.seed_regions_keywords_from_listings')
|
||||
@patch('web.craigslist.get_all_regions')
|
||||
@patch('web.craigslist.get_all_keywords')
|
||||
@patch('web.craigslist.get_last_fetch_time')
|
||||
@patch('web.craigslist.process_region_keyword')
|
||||
@patch('web.craigslist.upsert_listing')
|
||||
@patch('web.craigslist.insert_log')
|
||||
def test_fetch_listings_return_structure(self, mock_log, mock_upsert, mock_process, mock_last_fetch,
|
||||
mock_keywords, mock_regions, mock_seed, mock_db_urls):
|
||||
"""Test that fetch_listings returns the correct structure with per-search counts."""
|
||||
# Setup mocks
|
||||
mock_db_urls.return_value = []
|
||||
mock_regions.return_value = [{"name": "sfbay"}]
|
||||
mock_keywords.return_value = [{"name": "python"}]
|
||||
mock_last_fetch.return_value = None # Never fetched before
|
||||
mock_process.return_value = [
|
||||
("2025-11-03T10:00:00Z", "sfbay", "python", "Python Dev",
|
||||
"$100k", "San Francisco", "http://example.com/1"),
|
||||
("2025-11-03T10:00:00Z", "sfbay", "python", "Python Dev",
|
||||
"$100k", "San Francisco", "http://example.com/2"),
|
||||
]
|
||||
|
||||
# Collect messages and get return value from generator
|
||||
gen = fetch_listings()
|
||||
messages = []
|
||||
result = None
|
||||
try:
|
||||
while True:
|
||||
messages.append(next(gen))
|
||||
except StopIteration as e:
|
||||
result = e.value
|
||||
|
||||
# Verify return structure
|
||||
assert result is not None
|
||||
assert "discovered" in result
|
||||
assert "new" in result
|
||||
assert "by_search" in result
|
||||
assert isinstance(result.get("by_search"), list)
|
||||
assert result.get("discovered") == 2
|
||||
assert result.get("new") == 2
|
||||
|
||||
@patch('web.craigslist.db_get_all_job_urls')
|
||||
@patch('web.craigslist.seed_regions_keywords_from_listings')
|
||||
@patch('web.craigslist.get_all_regions')
|
||||
@patch('web.craigslist.get_all_keywords')
|
||||
@patch('web.craigslist.get_last_fetch_time')
|
||||
@patch('web.craigslist.process_region_keyword')
|
||||
@patch('web.craigslist.upsert_listing')
|
||||
@patch('web.craigslist.insert_log')
|
||||
def test_fetch_listings_per_search_count(self, mock_log, mock_upsert, mock_process, mock_last_fetch,
|
||||
mock_keywords, mock_regions, mock_seed, mock_db_urls):
|
||||
"""Test that fetch_listings correctly counts jobs per search."""
|
||||
# Setup mocks
|
||||
mock_db_urls.return_value = []
|
||||
mock_regions.return_value = [{"name": "sfbay"}, {"name": "losangeles"}]
|
||||
mock_keywords.return_value = [{"name": "python"}, {"name": "java"}]
|
||||
mock_last_fetch.return_value = None # Never fetched before
|
||||
|
||||
# Mock process_region_keyword to return different counts for each search
|
||||
def mock_process_impl(region, keyword, discovered_urls):
|
||||
# Use unique URLs per search to get the total discovered count
|
||||
base_url = f"http://example.com/{region}/{keyword}"
|
||||
counts = {
|
||||
("sfbay", "python"): 3,
|
||||
("sfbay", "java"): 2,
|
||||
("losangeles", "python"): 4,
|
||||
("losangeles", "java"): 1,
|
||||
}
|
||||
count = counts.get((region, keyword), 0)
|
||||
return [(f"2025-11-03T10:00:00Z", region, keyword, f"Job {i}", "$100k", region, f"{base_url}/{i}")
|
||||
for i in range(count)]
|
||||
|
||||
mock_process.side_effect = mock_process_impl
|
||||
|
||||
# Collect result from generator
|
||||
gen = fetch_listings()
|
||||
messages = []
|
||||
result = None
|
||||
try:
|
||||
while True:
|
||||
messages.append(next(gen))
|
||||
except StopIteration as e:
|
||||
result = e.value
|
||||
|
||||
# Verify per-search counts
|
||||
assert result is not None
|
||||
by_search = result.get("by_search", [])
|
||||
assert len(by_search) == 4
|
||||
|
||||
search_data = {(r.get("region"), r.get("keyword")) : r.get("count") for r in by_search}
|
||||
assert search_data.get(("sfbay", "python")) == 3
|
||||
assert search_data.get(("sfbay", "java")) == 2
|
||||
assert search_data.get(("losangeles", "python")) == 4
|
||||
assert search_data.get(("losangeles", "java")) == 1
|
||||
assert result.get("discovered") == 10 # Total unique jobs
|
||||
|
||||
384
tests/test_scraper.py
Normal file
384
tests/test_scraper.py
Normal file
@@ -0,0 +1,384 @@
|
||||
import pytest
|
||||
from web.scraper import scrape_job_page, extract_contact_info
|
||||
from web.craigslist import process_job_url, scraper
|
||||
|
||||
|
||||
def _make_negative_job(url: str) -> dict:
|
||||
return {
|
||||
"url": url,
|
||||
"title": "SCAM role",
|
||||
"company": "Test Co",
|
||||
"location": "Remote",
|
||||
"description": "This is a scam offer",
|
||||
"id": "job123",
|
||||
"posted_time": "",
|
||||
"reply_url": "N/A",
|
||||
"contact_email": "N/A",
|
||||
"contact_phone": "N/A",
|
||||
"contact_name": "N/A",
|
||||
"is_negative_match": True,
|
||||
"negative_keyword_match": "scam",
|
||||
"negative_match_field": "title",
|
||||
}
|
||||
|
||||
|
||||
class TestExtractContactInfo:
|
||||
"""Test suite for contact information extraction."""
|
||||
|
||||
def test_extract_email_from_mailto_link(self):
|
||||
"""Test extraction of email from mailto link."""
|
||||
reply_url = "mailto:contact@example.com?subject=Job%20Inquiry"
|
||||
contact_info = extract_contact_info(reply_url)
|
||||
|
||||
assert contact_info["email"] == "contact@example.com"
|
||||
assert contact_info["phone"] == "N/A"
|
||||
assert contact_info["contact_name"] == "N/A"
|
||||
|
||||
def test_extract_phone_from_tel_link(self):
|
||||
"""Test extraction of phone from tel link."""
|
||||
reply_url = "tel:+1234567890"
|
||||
contact_info = extract_contact_info(reply_url)
|
||||
|
||||
assert contact_info["email"] == "N/A"
|
||||
assert contact_info["phone"] == "+1234567890"
|
||||
assert contact_info["contact_name"] == "N/A"
|
||||
|
||||
def test_extract_email_from_url_parameter(self):
|
||||
"""Test extraction of email from URL query parameters."""
|
||||
reply_url = "https://example.com/contact?email=jobs@company.com&name=John%20Doe"
|
||||
contact_info = extract_contact_info(reply_url)
|
||||
|
||||
assert contact_info["email"] == "jobs@company.com"
|
||||
assert contact_info["contact_name"] == "John Doe"
|
||||
|
||||
def test_extract_phone_from_url_parameter(self):
|
||||
"""Test extraction of phone from URL query parameters."""
|
||||
reply_url = "https://example.com/apply?phone=555-1234&email=contact@test.com"
|
||||
contact_info = extract_contact_info(reply_url)
|
||||
|
||||
assert contact_info["phone"] == "555-1234"
|
||||
assert contact_info["email"] == "contact@test.com"
|
||||
|
||||
def test_extract_contact_name_from_url_parameter(self):
|
||||
"""Test extraction of contact name from URL query parameters."""
|
||||
reply_url = "https://example.com/reply?name=Alice%20Smith&contact_name=Bob%20Jones"
|
||||
contact_info = extract_contact_info(reply_url)
|
||||
|
||||
# Should prefer contact_name over name
|
||||
assert contact_info["contact_name"] == "Bob Jones"
|
||||
|
||||
def test_extract_all_fields_from_url(self):
|
||||
"""Test extraction of all fields from URL parameters."""
|
||||
reply_url = "https://example.com/contact?email=hr@company.com&phone=555-9876&contact_name=Jane%20Doe"
|
||||
contact_info = extract_contact_info(reply_url)
|
||||
|
||||
assert contact_info["email"] == "hr@company.com"
|
||||
assert contact_info["phone"] == "555-9876"
|
||||
assert contact_info["contact_name"] == "Jane Doe"
|
||||
|
||||
def test_handle_empty_reply_url(self):
|
||||
"""Test handling of empty reply URL."""
|
||||
contact_info = extract_contact_info("")
|
||||
|
||||
assert contact_info["email"] == "N/A"
|
||||
assert contact_info["phone"] == "N/A"
|
||||
assert contact_info["contact_name"] == "N/A"
|
||||
|
||||
def test_handle_na_reply_url(self):
|
||||
"""Test handling of N/A reply URL."""
|
||||
contact_info = extract_contact_info("N/A")
|
||||
|
||||
assert contact_info["email"] == "N/A"
|
||||
assert contact_info["phone"] == "N/A"
|
||||
assert contact_info["contact_name"] == "N/A"
|
||||
|
||||
def test_handle_none_reply_url(self):
|
||||
"""Test handling of None reply URL."""
|
||||
contact_info = extract_contact_info(None)
|
||||
|
||||
assert contact_info["email"] == "N/A"
|
||||
assert contact_info["phone"] == "N/A"
|
||||
assert contact_info["contact_name"] == "N/A"
|
||||
|
||||
def test_handle_invalid_url(self):
|
||||
"""Test handling of invalid URL (graceful fallback)."""
|
||||
reply_url = "not a valid url at all"
|
||||
contact_info = extract_contact_info(reply_url)
|
||||
|
||||
# Should return all N/A values without crashing
|
||||
assert contact_info["email"] == "N/A"
|
||||
assert contact_info["phone"] == "N/A"
|
||||
assert contact_info["contact_name"] == "N/A"
|
||||
|
||||
def test_multiple_parameter_variations(self):
|
||||
"""Test that function finds email despite multiple parameter name variations."""
|
||||
reply_url = "https://example.com/reply?from_email=sender@test.com&other=value"
|
||||
contact_info = extract_contact_info(reply_url)
|
||||
|
||||
assert contact_info["email"] == "sender@test.com"
|
||||
|
||||
def test_telephone_parameter_name(self):
|
||||
"""Test extraction using 'telephone' parameter name."""
|
||||
reply_url = "https://example.com/contact?telephone=555-0000"
|
||||
contact_info = extract_contact_info(reply_url)
|
||||
|
||||
assert contact_info["phone"] == "555-0000"
|
||||
|
||||
|
||||
class TestScrapeJobPageContactInfo:
|
||||
"""Test suite for scrape_job_page contact information extraction."""
|
||||
|
||||
def test_scrape_job_page_includes_contact_fields(self):
|
||||
"""Test that scrape_job_page includes contact information in return dict."""
|
||||
html_content = """
|
||||
<html>
|
||||
<h1 class="postingtitle">Software Engineer</h1>
|
||||
<h2 class="company-name">Tech Company</h2>
|
||||
<button class="reply-button" data-href="mailto:jobs@techco.com"></button>
|
||||
<div id="map" data-latitude="37.7749" data-longitude="-122.4194" data-accuracy="rooftop"></div>
|
||||
<section id="postingbody">
|
||||
<p>This is a test job description</p>
|
||||
</section>
|
||||
<div class="postinginfos">
|
||||
<p class="postinginfo">posting id: 12345abc</p>
|
||||
<time class="date timeago" datetime="2025-11-03T10:00:00"></time>
|
||||
</div>
|
||||
</html>
|
||||
"""
|
||||
|
||||
job_data = scrape_job_page(html_content, "https://example.com/job/123")
|
||||
|
||||
# Verify all expected keys are present
|
||||
assert "contact_email" in job_data
|
||||
assert "contact_phone" in job_data
|
||||
assert "contact_name" in job_data
|
||||
assert "reply_url" in job_data
|
||||
|
||||
def test_scrape_job_page_extracts_mailto_contact(self):
|
||||
"""Test that scrape_job_page correctly extracts email from mailto link."""
|
||||
html_content = """
|
||||
<html>
|
||||
<h1 class="postingtitle">Job Title</h1>
|
||||
<h2 class="company-name">Company</h2>
|
||||
<button class="reply-button" data-href="mailto:hiring@company.com?subject=Application"></button>
|
||||
<div id="map"></div>
|
||||
<section id="postingbody"><p>Job desc</p></section>
|
||||
<div class="postinginfos">
|
||||
<p class="postinginfo">id: xyz</p>
|
||||
</div>
|
||||
</html>
|
||||
"""
|
||||
|
||||
job_data = scrape_job_page(html_content, "https://example.com/job/456")
|
||||
|
||||
assert job_data["contact_email"] == "hiring@company.com"
|
||||
assert job_data["reply_url"] == "mailto:hiring@company.com?subject=Application"
|
||||
|
||||
def test_scrape_job_page_no_reply_button(self):
|
||||
"""Test scrape_job_page when no reply button is present."""
|
||||
html_content = """
|
||||
<html>
|
||||
<h1 class="postingtitle">Job Title</h1>
|
||||
<h2 class="company-name">Company</h2>
|
||||
<div id="map"></div>
|
||||
<section id="postingbody"><p>Job desc</p></section>
|
||||
<div class="postinginfos">
|
||||
<p class="postinginfo">id: xyz</p>
|
||||
</div>
|
||||
</html>
|
||||
"""
|
||||
|
||||
job_data = scrape_job_page(html_content, "https://example.com/job/789")
|
||||
|
||||
# Should have N/A for all contact fields
|
||||
assert job_data["reply_url"] == "N/A"
|
||||
assert job_data["contact_email"] == "N/A"
|
||||
assert job_data["contact_phone"] == "N/A"
|
||||
assert job_data["contact_name"] == "N/A"
|
||||
|
||||
def test_scrape_job_page_with_url_based_reply(self):
|
||||
"""Test scrape_job_page with URL-based reply link containing contact info."""
|
||||
html_content = """
|
||||
<html>
|
||||
<h1 class="postingtitle">Manager Position</h1>
|
||||
<h2 class="company-name">BigCorp</h2>
|
||||
<button class="reply-button" data-href="https://apply.bigcorp.com?email=hr@bigcorp.com&name=HR%20Team"></button>
|
||||
<div id="map"></div>
|
||||
<section id="postingbody"><p>Apply now</p></section>
|
||||
<div class="postinginfos">
|
||||
<p class="postinginfo">id: manager123</p>
|
||||
</div>
|
||||
</html>
|
||||
"""
|
||||
|
||||
job_data = scrape_job_page(html_content, "https://example.com/job/999")
|
||||
|
||||
assert job_data["contact_email"] == "hr@bigcorp.com"
|
||||
assert job_data["contact_name"] == "HR Team"
|
||||
|
||||
def test_scrape_job_page_negative_keyword_match(self, monkeypatch):
|
||||
"""Test that negative keyword detection flags matching jobs."""
|
||||
|
||||
monkeypatch.setattr(
|
||||
"web.scraper.get_negative_keywords", lambda: ["scam"])
|
||||
|
||||
html_content = """
|
||||
<html>
|
||||
<h1 class="postingtitle">Great Opportunity</h1>
|
||||
<h2 class="company-name">SCAM Corp</h2>
|
||||
<section id="postingbody"><p>This is a scam offer</p></section>
|
||||
</html>
|
||||
"""
|
||||
|
||||
job_data = scrape_job_page(
|
||||
html_content, "https://example.com/job/negative")
|
||||
|
||||
assert job_data["is_negative_match"] is True
|
||||
assert job_data["negative_keyword_match"] == "scam"
|
||||
assert job_data["negative_match_field"] in {
|
||||
"title", "company", "description"}
|
||||
|
||||
def test_scrape_job_page_no_negative_match(self, monkeypatch):
|
||||
"""Test that jobs without matching keywords are not flagged."""
|
||||
|
||||
monkeypatch.setattr(
|
||||
"web.scraper.get_negative_keywords", lambda: ["scam"])
|
||||
|
||||
html_content = """
|
||||
<html>
|
||||
<h1 class="postingtitle">Legit Opportunity</h1>
|
||||
<h2 class="company-name">Honest Corp</h2>
|
||||
<section id="postingbody"><p>We pay well and on time.</p></section>
|
||||
</html>
|
||||
"""
|
||||
|
||||
job_data = scrape_job_page(
|
||||
html_content, "https://example.com/job/positive")
|
||||
|
||||
assert job_data["is_negative_match"] is False
|
||||
assert job_data["negative_keyword_match"] is None
|
||||
assert job_data["negative_match_field"] is None
|
||||
|
||||
|
||||
class TestProcessJobUrlNegativeFiltering:
|
||||
def test_process_job_url_skips_negative_match(self, monkeypatch):
|
||||
job_url = "https://example.com/job/negative"
|
||||
remove_calls = []
|
||||
upsert_calls = []
|
||||
|
||||
monkeypatch.setattr(
|
||||
"web.craigslist.get_last_fetch_time", lambda url: None)
|
||||
monkeypatch.setattr(
|
||||
"web.craigslist.insert_log",
|
||||
lambda *args, **kwargs: None,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"web.craigslist.make_request_with_retry",
|
||||
lambda url, attempts: "<html />",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"web.craigslist.scrape_job_page",
|
||||
lambda content, url: _make_negative_job(url),
|
||||
)
|
||||
|
||||
def fake_upsert(job_data, region="", keyword=""):
|
||||
upsert_calls.append(job_data)
|
||||
|
||||
def fake_remove(url):
|
||||
remove_calls.append(url)
|
||||
|
||||
monkeypatch.setattr("web.craigslist.upsert_job_details", fake_upsert)
|
||||
monkeypatch.setattr("web.craigslist.remove_job", fake_remove)
|
||||
|
||||
messages = list(process_job_url(job_url, region="test", keyword="kw"))
|
||||
|
||||
assert any("Skipping job" in message for message in messages)
|
||||
assert remove_calls == [job_url]
|
||||
assert upsert_calls == []
|
||||
|
||||
|
||||
class TestScraperPipelineNegativeFiltering:
|
||||
def test_scraper_skips_negative_jobs(self, monkeypatch):
|
||||
job_url = "https://example.com/job/negative"
|
||||
remove_calls = []
|
||||
upsert_calls = []
|
||||
|
||||
monkeypatch.setattr("web.craigslist.db_init", lambda: None)
|
||||
|
||||
def fake_fetch_listings():
|
||||
yield "Fake listing fetch\n"
|
||||
return {"discovered": 0, "new": 0, "by_search": [], "new_jobs": []}
|
||||
|
||||
monkeypatch.setattr("web.craigslist.fetch_listings",
|
||||
fake_fetch_listings)
|
||||
monkeypatch.setattr(
|
||||
"web.craigslist.db_get_all_job_urls",
|
||||
lambda: [{"url": job_url, "region": "reg", "keyword": "kw"}],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"web.craigslist.get_last_fetch_time", lambda url: None)
|
||||
monkeypatch.setattr("web.craigslist.insert_log",
|
||||
lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
"web.craigslist.make_request_with_retry", lambda url, attempts: "<html />"
|
||||
)
|
||||
monkeypatch.setattr("web.craigslist.url_to_job_id",
|
||||
lambda url: "job123")
|
||||
monkeypatch.setattr(
|
||||
"web.craigslist.scrape_job_page",
|
||||
lambda content, url: _make_negative_job(url),
|
||||
)
|
||||
|
||||
def fake_upsert(job_data, region="", keyword=""):
|
||||
upsert_calls.append(job_data)
|
||||
|
||||
def fake_remove(url):
|
||||
remove_calls.append(url)
|
||||
|
||||
monkeypatch.setattr("web.craigslist.upsert_job_details", fake_upsert)
|
||||
monkeypatch.setattr("web.craigslist.remove_job", fake_remove)
|
||||
|
||||
messages = list(scraper())
|
||||
|
||||
assert any("Skipping job" in message for message in messages)
|
||||
assert remove_calls == [job_url]
|
||||
assert upsert_calls == []
|
||||
|
||||
|
||||
class TestScraperEmailNotifications:
|
||||
def test_scraper_sends_email_for_new_jobs(self, monkeypatch):
|
||||
monkeypatch.setattr("web.craigslist.db_init", lambda: None)
|
||||
|
||||
new_jobs = [
|
||||
{
|
||||
"title": "Python Developer",
|
||||
"company": "Acme",
|
||||
"location": "Remote",
|
||||
"url": "https://example.com/jobs/1",
|
||||
}
|
||||
]
|
||||
|
||||
def fake_fetch_listings():
|
||||
yield "Fake listing fetch\n"
|
||||
return {
|
||||
"discovered": 1,
|
||||
"new": 1,
|
||||
"by_search": [],
|
||||
"new_jobs": new_jobs,
|
||||
}
|
||||
|
||||
monkeypatch.setattr("web.craigslist.fetch_listings", fake_fetch_listings)
|
||||
monkeypatch.setattr("web.craigslist.db_get_all_job_urls", lambda: [])
|
||||
|
||||
calls = {}
|
||||
|
||||
def fake_send_alert(jobs):
|
||||
calls["jobs"] = jobs
|
||||
return True, "sent"
|
||||
|
||||
monkeypatch.setattr("web.craigslist._send_new_job_alert", fake_send_alert)
|
||||
|
||||
messages = list(scraper())
|
||||
|
||||
assert calls["jobs"] == new_jobs
|
||||
assert any("Job alert email sent." in message for message in messages)
|
||||
148
tests/test_user_negative_keywords.py
Normal file
148
tests/test_user_negative_keywords.py
Normal file
@@ -0,0 +1,148 @@
|
||||
import pytest
|
||||
from web.db import (
|
||||
db_init,
|
||||
create_or_update_user,
|
||||
upsert_negative_keyword,
|
||||
set_user_negative_keywords,
|
||||
get_user_negative_keywords,
|
||||
upsert_listing,
|
||||
upsert_job_details,
|
||||
get_all_jobs,
|
||||
UserNegativeKeyword,
|
||||
NegativeKeyword
|
||||
)
|
||||
from web.app import app
|
||||
from web.utils import filter_jobs
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
app.config['TESTING'] = True
|
||||
app.config['WTF_CSRF_ENABLED'] = False
|
||||
with app.test_client() as client:
|
||||
with app.app_context():
|
||||
db_init()
|
||||
yield client
|
||||
|
||||
|
||||
def test_negative_keyword_db_ops():
|
||||
db_init()
|
||||
username = "test_neg_user"
|
||||
create_or_update_user(username, "password")
|
||||
|
||||
# Test upsert
|
||||
kid = upsert_negative_keyword("scam")
|
||||
assert kid > 0
|
||||
kid2 = upsert_negative_keyword("scam")
|
||||
assert kid == kid2
|
||||
|
||||
# Test set/get
|
||||
set_user_negative_keywords(username, ["scam", "unpaid"])
|
||||
nks = get_user_negative_keywords(username)
|
||||
assert len(nks) == 2
|
||||
assert "scam" in nks
|
||||
assert "unpaid" in nks
|
||||
|
||||
# Test update
|
||||
set_user_negative_keywords(username, ["scam"])
|
||||
nks = get_user_negative_keywords(username)
|
||||
assert len(nks) == 1
|
||||
assert "scam" in nks
|
||||
assert "unpaid" not in nks
|
||||
|
||||
# Test clear
|
||||
set_user_negative_keywords(username, [])
|
||||
nks = get_user_negative_keywords(username)
|
||||
assert len(nks) == 0
|
||||
|
||||
|
||||
def test_settings_endpoint(client):
|
||||
username = "test_settings_user"
|
||||
create_or_update_user(username, "password")
|
||||
|
||||
# Login
|
||||
client.post('/login', data={'username': username, 'password': 'password'})
|
||||
|
||||
# Post settings
|
||||
resp = client.post('/settings', json={
|
||||
'regions': [],
|
||||
'keywords': [],
|
||||
'negative_keywords': ['spam', 'junk']
|
||||
})
|
||||
assert resp.status_code == 200
|
||||
|
||||
# Verify DB
|
||||
nks = get_user_negative_keywords(username)
|
||||
assert "spam" in nks
|
||||
assert "junk" in nks
|
||||
|
||||
|
||||
def test_job_filtering_with_negative_keywords():
|
||||
# Setup jobs
|
||||
jobs = [
|
||||
{"title": "Great Job", "description": "Good pay"},
|
||||
{"title": "Bad Job", "description": "This is a scam"},
|
||||
{"title": "Okay Job", "description": "Average pay"},
|
||||
]
|
||||
|
||||
# Filter
|
||||
filtered = filter_jobs(jobs, negative_keywords=["scam"])
|
||||
assert len(filtered) == 2
|
||||
assert "Bad Job" not in [j['title'] for j in filtered]
|
||||
|
||||
filtered = filter_jobs(jobs, negative_keywords=["pay"])
|
||||
assert len(filtered) == 1
|
||||
assert "Bad Job" in [j['title']
|
||||
for j in filtered] # "scam" job doesn't have "pay"
|
||||
|
||||
|
||||
def test_jobs_endpoint_filtering(client):
|
||||
username = "test_filter_user"
|
||||
create_or_update_user(username, "password")
|
||||
|
||||
# Setup DB with jobs
|
||||
upsert_listing(
|
||||
url="http://example.com/1",
|
||||
region="sfbay",
|
||||
keyword="python",
|
||||
title="Good Python Job",
|
||||
pay="$100k",
|
||||
location="SF",
|
||||
timestamp="now"
|
||||
)
|
||||
upsert_job_details({
|
||||
"url": "http://example.com/1",
|
||||
"id": "1",
|
||||
"title": "Good Python Job",
|
||||
"description": "This is a legit job."
|
||||
})
|
||||
|
||||
upsert_listing(
|
||||
url="http://example.com/2",
|
||||
region="sfbay",
|
||||
keyword="python",
|
||||
title="Bad Python Job",
|
||||
pay="$100k",
|
||||
location="SF",
|
||||
timestamp="now"
|
||||
)
|
||||
upsert_job_details({
|
||||
"url": "http://example.com/2",
|
||||
"id": "2",
|
||||
"title": "Bad Python Job",
|
||||
"description": "This is a scam job."
|
||||
})
|
||||
|
||||
# Login
|
||||
client.post('/login', data={'username': username, 'password': 'password'})
|
||||
|
||||
# Set negative keywords
|
||||
set_user_negative_keywords(username, ["scam"])
|
||||
|
||||
# Fetch jobs
|
||||
resp = client.get('/jobs')
|
||||
data = resp.get_json()
|
||||
|
||||
titles = [j['title'] for j in data]
|
||||
assert "Good Python Job" in titles
|
||||
assert "Bad Python Job" not in titles
|
||||
@@ -16,3 +16,23 @@ def test_http_settings_helpers():
|
||||
assert isinstance(utils.get_backoff_factor(), int)
|
||||
assert isinstance(utils.get_min_delay(), int)
|
||||
assert isinstance(utils.get_max_delay(), int)
|
||||
|
||||
|
||||
def test_negative_keywords_helper():
|
||||
keywords = utils.get_negative_keywords()
|
||||
assert isinstance(keywords, list)
|
||||
for kw in keywords:
|
||||
assert isinstance(kw, str)
|
||||
assert kw == kw.lower()
|
||||
|
||||
|
||||
def test_email_settings_helper():
|
||||
settings = utils.get_email_settings()
|
||||
assert isinstance(settings, dict)
|
||||
assert 'enabled' in settings
|
||||
assert 'from_address' in settings
|
||||
smtp = settings.get('smtp')
|
||||
assert isinstance(smtp, dict)
|
||||
assert 'host' in smtp
|
||||
assert isinstance(smtp.get('port'), int)
|
||||
assert isinstance(settings.get('recipients'), list)
|
||||
|
||||
Reference in New Issue
Block a user