import pytest from web.scraper import scrape_job_page, extract_contact_info from web.craigslist import process_job_url, scraper def _make_negative_job(url: str) -> dict: return { "url": url, "title": "SCAM role", "company": "Test Co", "location": "Remote", "description": "This is a scam offer", "id": "job123", "posted_time": "", "reply_url": "N/A", "contact_email": "N/A", "contact_phone": "N/A", "contact_name": "N/A", "is_negative_match": True, "negative_keyword_match": "scam", "negative_match_field": "title", } class TestExtractContactInfo: """Test suite for contact information extraction.""" def test_extract_email_from_mailto_link(self): """Test extraction of email from mailto link.""" reply_url = "mailto:contact@example.com?subject=Job%20Inquiry" contact_info = extract_contact_info(reply_url) assert contact_info["email"] == "contact@example.com" assert contact_info["phone"] == "N/A" assert contact_info["contact_name"] == "N/A" def test_extract_phone_from_tel_link(self): """Test extraction of phone from tel link.""" reply_url = "tel:+1234567890" contact_info = extract_contact_info(reply_url) assert contact_info["email"] == "N/A" assert contact_info["phone"] == "+1234567890" assert contact_info["contact_name"] == "N/A" def test_extract_email_from_url_parameter(self): """Test extraction of email from URL query parameters.""" reply_url = "https://example.com/contact?email=jobs@company.com&name=John%20Doe" contact_info = extract_contact_info(reply_url) assert contact_info["email"] == "jobs@company.com" assert contact_info["contact_name"] == "John Doe" def test_extract_phone_from_url_parameter(self): """Test extraction of phone from URL query parameters.""" reply_url = "https://example.com/apply?phone=555-1234&email=contact@test.com" contact_info = extract_contact_info(reply_url) assert contact_info["phone"] == "555-1234" assert contact_info["email"] == "contact@test.com" def test_extract_contact_name_from_url_parameter(self): """Test extraction of contact name from URL query parameters.""" reply_url = "https://example.com/reply?name=Alice%20Smith&contact_name=Bob%20Jones" contact_info = extract_contact_info(reply_url) # Should prefer contact_name over name assert contact_info["contact_name"] == "Bob Jones" def test_extract_all_fields_from_url(self): """Test extraction of all fields from URL parameters.""" reply_url = "https://example.com/contact?email=hr@company.com&phone=555-9876&contact_name=Jane%20Doe" contact_info = extract_contact_info(reply_url) assert contact_info["email"] == "hr@company.com" assert contact_info["phone"] == "555-9876" assert contact_info["contact_name"] == "Jane Doe" def test_handle_empty_reply_url(self): """Test handling of empty reply URL.""" contact_info = extract_contact_info("") assert contact_info["email"] == "N/A" assert contact_info["phone"] == "N/A" assert contact_info["contact_name"] == "N/A" def test_handle_na_reply_url(self): """Test handling of N/A reply URL.""" contact_info = extract_contact_info("N/A") assert contact_info["email"] == "N/A" assert contact_info["phone"] == "N/A" assert contact_info["contact_name"] == "N/A" def test_handle_none_reply_url(self): """Test handling of None reply URL.""" contact_info = extract_contact_info(None) assert contact_info["email"] == "N/A" assert contact_info["phone"] == "N/A" assert contact_info["contact_name"] == "N/A" def test_handle_invalid_url(self): """Test handling of invalid URL (graceful fallback).""" reply_url = "not a valid url at all" contact_info = extract_contact_info(reply_url) # Should return all N/A values without crashing assert contact_info["email"] == "N/A" assert contact_info["phone"] == "N/A" assert contact_info["contact_name"] == "N/A" def test_multiple_parameter_variations(self): """Test that function finds email despite multiple parameter name variations.""" reply_url = "https://example.com/reply?from_email=sender@test.com&other=value" contact_info = extract_contact_info(reply_url) assert contact_info["email"] == "sender@test.com" def test_telephone_parameter_name(self): """Test extraction using 'telephone' parameter name.""" reply_url = "https://example.com/contact?telephone=555-0000" contact_info = extract_contact_info(reply_url) assert contact_info["phone"] == "555-0000" class TestScrapeJobPageContactInfo: """Test suite for scrape_job_page contact information extraction.""" def test_scrape_job_page_includes_contact_fields(self): """Test that scrape_job_page includes contact information in return dict.""" html_content = """
This is a test job description
posting id: 12345abc
Job desc
id: xyz
Job desc
id: xyz
Apply now
id: manager123
This is a scam offer
We pay well and on time.