fix: update scrape function to handle HTML response and improve status messages

fix: update fetch logic to skip jobs fetched within the last 24 hours and adjust retry attempts in scraper
feat: Enhance CI/CD pipeline with Docker image build and push steps
2025-11-30 10:51:16 +01:00 · 2025-11-28 20:54:39 +01:00 · 2025-11-28 19:16:28 +01:00 · 2025-11-28 18:15:08 +01:00 · 2025-11-03 19:04:34 +01:00 · 2025-11-01 19:47:41 +01:00
49 changed files with 4367 additions and 751 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,63 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+.venv/
+venv/
+ENV/
+env/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Git
+.git/
+.gitignore
+
+# Logs
+logs/
+*.log
+
+# Cache
+cache/
+
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+
+# Documentation
+docs/_build/
+
+# Docker
+Dockerfile*
+docker-compose*.yml
+.dockerignore
+README-Docker.md
+deploy.sh
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -0,0 +1,98 @@
+name: CI/CD Pipeline
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run tests
+        run: |
+          python -m pytest tests/ -v
+
+      # - name: Run linting
+      #   run: |
+      #     python -m flake8 web/ tests/ --max-line-length=120
+  build-image:
+    runs-on: ubuntu-latest
+    needs: test
+    env:
+      DEFAULT_BRANCH: main
+      REGISTRY_URL: ${{ secrets.REGISTRY_URL }}
+      REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }}
+      REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Collect workflow metadata
+        id: meta
+        shell: bash
+        run: |
+          ref_name="${GITHUB_REF_NAME:-${GITHUB_REF##*/}}"
+          event_name="${GITHUB_EVENT_NAME:-}"
+          sha="${GITHUB_SHA:-}"
+
+          if [ "$ref_name" = "${DEFAULT_BRANCH:-main}" ]; then
+            echo "on_default=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "on_default=false" >> "$GITHUB_OUTPUT"
+          fi
+
+          echo "ref_name=$ref_name" >> "$GITHUB_OUTPUT"
+          echo "event_name=$event_name" >> "$GITHUB_OUTPUT"
+          echo "sha=$sha" >> "$GITHUB_OUTPUT"
+
+      - name: Set up QEMU and Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to registry (best-effort)
+        if: ${{ steps.meta.outputs.on_default == 'true' }}
+        uses: docker/login-action@v3
+        continue-on-error: true
+        with:
+          registry: ${{ env.REGISTRY_URL }}
+          username: ${{ env.REGISTRY_USERNAME }}
+          password: ${{ env.REGISTRY_PASSWORD }}
+
+      - name: Build (and optionally push) image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: Dockerfile
+          push: ${{ steps.meta.outputs.on_default == 'true' && steps.meta.outputs.event_name != 'pull_request' && (env.REGISTRY_URL != '' && env.REGISTRY_USERNAME != '' && env.REGISTRY_PASSWORD != '') }}
+          tags: |
+            ${{ env.REGISTRY_URL }}/allucanget/jobs:latest
+            ${{ env.REGISTRY_URL }}/allucanget/jobs:${{ steps.meta.outputs.sha }}
+
+  # deploy:
+  #   runs-on: ubuntu-latest
+  #   needs: test
+  #   if: github.ref == 'refs/heads/main'
+
+  #   steps:
+  #     - name: Checkout code
+  #       uses: actions/checkout@v4
+
+  #     - name: Deploy to production
+  #       run: |
+  #         echo "Deploying to production..."
+  #         docker-compose up -d
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,3 @@
-.github/copilot*
 cache/
 logs/

@@ -165,3 +164,6 @@ cython_debug/
 #.idea/

 docs/online.md
+.github/copilot*
+.github/TODO.md
+.vscode/launch.json
--- a/51
+++ b/51
@@ -0,0 +1,51 @@
+# Use Python 3.11 slim image
+FROM python:3.11-slim-bookworm
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV FLASK_ENV=production
+
+# Add apt-cacher-ng configuration (if APT_CACHER_NG is set)
+RUN if [ -n "$APT_CACHER_NG" ]; then echo 'Acquire::http { Proxy "'"$APT_CACHER_NG"'/"; };' > /etc/apt/apt.conf.d/01proxy; fi
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    default-libmysqlclient-dev \
+    default-mysql-client \
+    pkg-config \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create app directory
+WORKDIR /app
+
+# Copy requirements first for better caching
+COPY requirements.txt .
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY . .
+
+# Create necessary directories
+RUN mkdir -p cache logs
+
+# Expose port
+EXPOSE 8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/ || exit 1
+
+# Copy entrypoint script
+COPY docker-entrypoint.sh /usr/local/bin/
+RUN chmod +x /usr/local/bin/docker-entrypoint.sh
+
+# Set entrypoint
+ENTRYPOINT ["docker-entrypoint.sh"]
+
+# Run gunicorn
+CMD ["gunicorn", "--config", "gunicorn.conf.py", "web.app:app"]
--- a/README-Docker.md
+++ b/README-Docker.md
@@ -0,0 +1,409 @@
+# Jobs App - Docker Deployment
+
+This application is a Craigslist job scraper with a Flask web interface.
+
+## Quick Start with Docker
+
+### Prerequisites
+
+- Docker
+- Docker Compose
+
+### Deployment
+
+1. **Clone the repository and navigate to the project directory**
+
+   ```bash
+   cd /path/to/jobs-app/jobs
+   ```
+
+2. **Start the application**
+
+   ```bash
+   docker-compose up --build -d
+   ```
+
+3. **Wait for services to be ready** (about 30 seconds)
+
+   ```bash
+   # Check if the app is running
+   curl http://localhost:8000
+   ```
+
+4. **Access the application**
+   - Main app: <http://localhost:8000>
+   - Admin interface: <http://localhost:8000/admin/users>
+     - Username: `admin`
+     - Password: `M11ffpgm.`
+   - Scraper interface: <http://localhost:8000/scrape-page>
+
+## Docker Architecture
+
+### Services
+
+- **jobs-app**: Flask application with Gunicorn WSGI server
+- **mysql**: MySQL 8.0 database
+
+### Ports
+
+- 8000: Flask application
+- 3306: MySQL database (exposed for external access if needed)
+
+### Volumes
+
+- `mysql_data`: Persistent MySQL data storage
+
+## Configuration
+
+### Environment Variables
+
+- `FLASK_ENV`: Set to `production`
+- `FLASK_SECRET`: Secret key for Flask sessions (required)
+- `APT_CACHER_NG`: Optional URL for apt-cacher-ng proxy to speed up package downloads (e.g., `http://192.168.88.14:3142`)
+
+### Database Configuration
+
+The database configuration is in `config/settings.json`:
+
+```json
+{
+  "database": {
+    "mysql": {
+      "host": "mysql",
+      "user": "jobs",
+      "password": "jobdb",
+      "database": "jobs",
+      "port": 3306
+    }
+  }
+}
+```
+
+## Useful Commands
+
+```bash
+# View logs
+docker-compose logs -f
+
+# Stop services
+docker-compose down
+
+# Restart services
+docker-compose restart
+
+# Rebuild and restart
+docker-compose up --build
+
+# View running containers
+docker-compose ps
+
+# Execute commands in the app container
+docker-compose exec jobs-app bash
+
+# Check database
+docker-compose exec mysql mysql -u jobs -p jobs
+```
+
+## Production Considerations
+
+1. **Security**:
+
+   - Change default passwords in `docker-compose.yml`
+   - Use environment variables for secrets
+   - Configure proper firewall rules
+
+2. **Scaling**:
+
+   - Adjust Gunicorn workers in `gunicorn.conf.py`
+   - Consider using a reverse proxy (nginx)
+   - Implement proper logging and monitoring
+
+3. **Database**:
+
+   - Use external MySQL for production
+   - Configure backups
+   - Set up connection pooling
+
+4. **Networking**:
+   - Use proper domain names
+   - Configure SSL/TLS
+   - Set up load balancing if needed
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Port conflicts**: Change ports in `docker-compose.yml`
+2. **Database connection**: Ensure MySQL container is healthy
+3. **Memory issues**: Increase Docker memory limits
+4. **Permission issues**: Check file permissions in mounted volumes
+
+### Logs
+
+```bash
+# Application logs
+docker-compose logs jobs-app
+
+# Database logs
+docker-compose logs mysql
+
+# All logs
+docker-compose logs
+```
+
+## Development
+
+For development with hot reload:
+
+```bash
+# Run in development mode
+docker-compose -f docker-compose.dev.yml up --build
+```
+
+Create `docker-compose.dev.yml`:
+
+```yaml
+version: "3.8"
+services:
+  jobs-app:
+    build: .
+    ports:
+      - "8000:8000"
+    environment:
+      - FLASK_ENV=development
+    volumes:
+      - .:/app
+    command: ["flask", "run", "--host", "0.0.0.0", "--port", "8000"]
+```
+
+## Coolify Deployment
+
+This application can be deployed on [Coolify](https://coolify.io) using Docker Compose. Coolify provides additional features and management capabilities for containerized applications.
+
+### Coolify Prerequisites
+
+- Coolify instance (self-hosted or cloud)
+- Git repository accessible to Coolify
+
+### Coolify-Specific Configuration
+
+#### 1. Environment Variables
+
+Coolify automatically detects environment variables in your `docker-compose.yml` and provides a UI to manage them. Use the following syntax for better integration:
+
+```yaml
+services:
+  jobs-app:
+    environment:
+      # Required variables (will show red border if empty)
+      - FLASK_SECRET=${FLASK_SECRET:?}
+
+      # Required with default (prefilled but editable)
+      - FLASK_ENV=${FLASK_ENV:?production}
+
+      # Optional with default
+      - GUNICORN_WORKERS=${GUNICORN_WORKERS:-4}
+```
+
+#### 2. Coolify Magic Environment Variables
+
+Leverage Coolify's dynamic environment variables:
+
+```yaml
+services:
+  jobs-app:
+    environment:
+      # Generate FQDN for the application
+      - SERVICE_FQDN_JOBS_APP
+
+      # Generate secure password for admin user
+      - ADMIN_PASSWORD=${SERVICE_PASSWORD_ADMIN:?M11ffpgm.}
+
+      # Generate database credentials
+      - DB_USER=${SERVICE_USER_DB:?jobs}
+      - DB_PASSWORD=${SERVICE_PASSWORD_DB:?jobdb}
+```
+
+#### 3. Storage Configuration
+
+Coolify supports advanced storage options:
+
+```yaml
+services:
+  jobs-app:
+    volumes:
+      # Create empty directories
+      - type: bind
+        source: ./cache
+        target: /app/cache
+        is_directory: true
+
+      - type: bind
+        source: ./logs
+        target: /app/logs
+        is_directory: true
+
+  mysql:
+    volumes:
+      # Persistent database storage
+      - mysql_data:/var/lib/mysql
+```
+
+#### 4. Health Checks and Service Management
+
+```yaml
+services:
+  jobs-app:
+    # Exclude from health checks if needed
+    exclude_from_hc: false
+
+    # Labels for Coolify management and Traefik routing
+    labels:
+      - coolify.managed=true
+      - traefik.enable=true
+      - "traefik.http.routers.jobs-app.rule=Host(`${SERVICE_FQDN_JOBS_APP:-localhost}`)"
+      - traefik.http.routers.jobs-app.entryPoints=https
+      - "traefik.http.routers.jobs-app.middlewares=https-redirect"
+      - "traefik.http.middlewares.https-redirect.redirectscheme.scheme=https"
+```
+
+#### 5. Database Configuration for Coolify
+
+Update your `config/settings.json` to use Coolify environment variables:
+
+```json
+{
+  "database": {
+    "mysql": {
+      "host": "mysql",
+      "user": "${DB_USER:-jobs}",
+      "password": "${DB_PASSWORD:-jobdb}",
+      "database": "jobs",
+      "port": 3306
+    }
+  }
+}
+```
+
+### Complete Coolify docker-compose.yml
+
+Here's a complete `docker-compose.yml` optimized for Coolify:
+
+```yaml
+version: "3.8"
+
+services:
+  jobs-app:
+    build: .
+    ports:
+      - "8000:8000"
+    environment:
+      # Required environment variables
+      - FLASK_SECRET=${FLASK_SECRET:?}
+      - FLASK_ENV=${FLASK_ENV:?production}
+
+      # Coolify magic variables
+      - SERVICE_FQDN_JOBS_APP
+      - ADMIN_PASSWORD=${SERVICE_PASSWORD_ADMIN:?M11ffpgm.}
+      - DB_USER=${SERVICE_USER_DB:?jobs}
+      - DB_PASSWORD=${SERVICE_PASSWORD_DB:?jobdb}
+
+      # Optional configuration
+      - GUNICORN_WORKERS=${GUNICORN_WORKERS:-4}
+      - APT_CACHER_NG=${APT_CACHER_NG}
+    volumes:
+      - type: bind
+        source: ./cache
+        target: /app/cache
+        is_directory: true
+      - type: bind
+        source: ./logs
+        target: /app/logs
+        is_directory: true
+    depends_on:
+      - mysql
+    labels:
+      - coolify.managed=true
+      - traefik.enable=true
+      - "traefik.http.routers.jobs-app.rule=Host(`${SERVICE_FQDN_JOBS_APP:-localhost}`)"
+      - traefik.http.routers.jobs-app.entryPoints=https
+      - "traefik.http.routers.jobs-app.middlewares=https-redirect"
+      - "traefik.http.middlewares.https-redirect.redirectscheme.scheme=https"
+    networks:
+      - jobs-network
+    restart: unless-stopped
+
+  mysql:
+    image: mysql:8.0
+    environment:
+      - MYSQL_ROOT_PASSWORD=${MYSQL_ROOT_PASSWORD:?rootpassword}
+      - MYSQL_DATABASE=jobs
+      - MYSQL_USER=${DB_USER:-jobs}
+      - MYSQL_PASSWORD=${DB_PASSWORD:-jobdb}
+    ports:
+      - "3306:3306"
+    volumes:
+      - mysql_data:/var/lib/mysql
+      - ./mysql-init:/docker-entrypoint-initdb.d
+    networks:
+      - jobs-network
+
+volumes:
+  mysql_data:
+
+networks:
+  jobs-network:
+    driver: bridge
+```
+
+### Coolify Deployment Steps
+
+1. **Connect Repository**: Link your Git repository to Coolify
+2. **Create Service Stack**: Choose "Docker Compose" as the build pack
+3. **Configure Environment Variables**: Set required variables in Coolify's UI:
+   - `FLASK_SECRET`: Generate a secure random string
+   - `FLASK_ENV`: Set to "production"
+   - `MYSQL_ROOT_PASSWORD`: Set a secure password
+4. **Deploy**: Coolify will automatically build and deploy your application
+5. **Access**: Use the generated FQDN to access your application
+
+### Coolify Benefits
+
+- **Automatic SSL**: HTTPS certificates are automatically managed
+- **Environment Management**: Easy variable management through UI
+- **Monitoring**: Built-in logging and health monitoring
+- **Scaling**: Easy horizontal scaling
+- **Backups**: Automated backup capabilities
+- **Security**: Isolated networks and secure defaults
+
+### Troubleshooting Coolify Deployments
+
+1. **Environment Variables**: Check that all required variables are set in Coolify's UI
+2. **Build Logs**: Review build logs for any compilation errors
+3. **Network Issues**: Ensure services can communicate within the stack
+4. **Storage Permissions**: Verify volume permissions are correct
+5. **FQDN Configuration**: Check that the generated FQDN is accessible
+
+#### Common Coolify Errors
+
+**Error: "The scheme `domain.sslip.io` isn't valid. It should be either `http`, `https`"**
+
+- **Cause**: Incorrect Traefik router configuration using full URLs instead of hostnames
+- **Solution**: Use `SERVICE_FQDN_*` variables (which contain just the domain) in `Host()` rules, not `SERVICE_URL_*` variables
+
+**Example**:
+
+```yaml
+# Correct
+- "traefik.http.routers.app.rule=Host(`${SERVICE_FQDN_APP:-localhost}`)"
+
+# Incorrect
+- "traefik.http.routers.app.rule=Host(`${SERVICE_URL_APP:-localhost}`)"
+```
+
+##### Container fails to start with permission denied
+
+- **Cause**: Volume mount permissions in Coolify environment
+- **Solution**: Ensure `is_directory: true` is set for bind mounts that need directory creation
+
+For more information, visit the [Coolify Docker Compose documentation](https://coolify.io/docs/knowledge-base/docker/compose).
--- a/README-Traefik.md
+++ b/README-Traefik.md
@@ -0,0 +1,87 @@
+# Setting Up Coolify Deployment Behind an Existing Traefik Proxy
+
+This guide explains how to configure your existing Traefik instance (running at `192.168.88.10`) to proxy traffic to a Coolify-deployed jobs-app service running on `192.168.88.13:8001`.
+
+## Prerequisites
+
+- Traefik is running and accessible at `192.168.88.10`
+- Your external IP is configured to point to Traefik for domain resolution
+- The jobs-app is deployed via Coolify and running on `192.168.88.13:8001`
+- You have access to Traefik's configuration files (assuming file-based provider)
+
+## Step 1: Verify Jobs-App Accessibility
+
+Ensure the jobs-app is running and accessible:
+
+```bash
+curl http://192.168.88.13:8001
+```
+
+You should receive a response from the Flask application.
+
+## Step 2: Configure Traefik
+
+Since Traefik is on a separate machine (`192.168.88.10`) and cannot directly watch the Docker containers on `192.168.88.13`, you'll need to manually configure the routing in Traefik's configuration.
+
+### Option 1: Using Traefik's File Provider
+
+Add the following configuration to your Traefik dynamic configuration file (e.g., `dynamic.yml`):
+
+```yaml
+http:
+  routers:
+    jobs-app:
+      rule: "Host(`your-domain.com`)" # Replace with your actual domain
+      service: jobs-app
+      entryPoints:
+        - https # Assuming Traefik handles SSL termination
+      middlewares:
+        - https-redirect # Optional: redirect HTTP to HTTPS
+
+  services:
+    jobs-app:
+      loadBalancer:
+        servers:
+          - url: "http://192.168.88.13:8001"
+
+  middlewares:
+    https-redirect:
+      redirectScheme:
+        scheme: https
+        permanent: true
+```
+
+### Option 2: Using Docker Labels (if Traefik can access the Docker socket)
+
+If Traefik has access to the Docker socket on `192.168.88.13` (e.g., via network mount or API), the Docker labels in `docker-compose.yml` will automatically configure the routing. No additional configuration is needed.
+
+## Step 3: Reload Traefik Configuration
+
+After updating the configuration, reload Traefik:
+
+```bash
+# If using Docker
+docker-compose restart traefik
+
+# Or if running directly
+systemctl reload traefik
+```
+
+## Step 4: Test the Setup
+
+1. Ensure your DNS points `your-domain.com` to your external IP, which routes to Traefik.
+2. Visit `https://your-domain.com` in your browser.
+3. Traefik should proxy the request to `http://192.168.88.13:8001` and serve the jobs-app.
+
+## Troubleshooting
+
+- **Port not accessible**: Ensure firewall rules allow traffic from `192.168.88.10` to `192.168.88.13:8001`.
+- **SSL issues**: If Traefik is not terminating SSL, adjust the `entryPoints` and remove HTTPS redirects.
+- **Routing not working**: Check Traefik logs for errors in router/service configuration.
+- **Domain mismatch**: Verify the `Host` rule matches your actual domain.
+
+## Notes
+
+- The jobs-app runs on port 8000 internally in the container, exposed on host port 8001.
+- If you need to change the external port, update the `ports` mapping in `docker-compose.yml` and the Traefik service URL accordingly.
+- For production, consider adding authentication, rate limiting, or other middlewares in Traefik.
--- a/README.md
+++ b/README.md
@@ -9,11 +9,32 @@ job scraper
 - Users can search for job listings by keywords and region
 - Selection of job listings based on user preferences

-## Requirements
+## Architecture Overview

- Database (MySQL/MariaDB)
- Python 3.x
-  - Required Python packages (see requirements.txt)
+The application is built as a modular Flask‑based service with clear separation of concerns:
+
+| Layer                         | Module                                   | Responsibility                                                                                                                                                                                                           |
+| ----------------------------- | ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| **Web UI**                    | `web/app.py`                             | Flask application that serves HTML pages, REST endpoints, and admin interfaces (users, taxonomy, health, email management).                                                                                              |
+| **Orchestrator**              | `web/craigslist.py`                      | Coordinates the scraping workflow: schedules runs, fetches listings, updates the DB, and triggers email alerts.                                                                                                          |
+| **Scraper**                   | `web/scraper.py`                         | Contains the low‑level HTML parsing logic (`scrape_job_data`, `scrape_job_page`, `extract_contact_info`).                                                                                                                |
+| **Persistence**               | `web/db.py`                              | SQLAlchemy ORM models (`User`, `JobListing`, `JobDescription`, `UserInteraction`, `Region`, `Keyword`, `EmailSubscription`, **`EmailTemplate`**) and helper functions for upserts, queries, and subscription management. |
+| **Email Rendering**           | `web/email_templates.py`                 | Renders job‑alert emails using a pluggable template system. Supports default placeholders (`{count_label}`, `{scope}`, `{timestamp}`, `{jobs_section}`, `{jobs_message}`) and custom admin‑defined templates.            |
+| **Email Delivery**            | `web/email_service.py`                   | Sends rendered messages via SMTP, handling TLS/SSL, authentication, and graceful disabling.                                                                                                                              |
+| **Configuration**             | `config/settings.json`                   | Centralised JSON config for database, HTTP, scraper options, negative keywords, and email settings.                                                                                                                      |
+| **Static Assets & Templates** | `web/static/`, `web/templates/`          | Front‑end resources (JS, CSS) and Jinja2 templates for the public UI and admin pages (including the new **Email Templates** management UI).                                                                              |
+| **Scheduler**                 | `schedule` (used in `web/craigslist.py`) | Runs the scraper automatically at configurable intervals (default hourly).                                                                                                                                               |
+| **Testing**                   | `tests/`                                 | Pytest suite covering scheduler, scraper, DB helpers, email service, and the new admin UI for email subscriptions and templates.                                                                                         |
+
+**Key architectural notes**
+
+- **Email Subscriptions** are stored in the `email_subscriptions` table and managed via `/admin/emails`.
+- **Email Templates** are persisted in the new `email_templates` table, editable through `/admin/email-templates`, and used by the alert system.
+- The orchestrator (`fetch_listings`) returns a detailed result dict (`discovered`, `new`, `by_search`) that drives UI metrics and health checks.
+- Contact information (`reply_url`, `contact_email`, `contact_phone`, `contact_name`) extracted by the scraper is saved in `job_descriptions`.
+- Negative keyword filtering is applied early in the pipeline to prevent unwanted listings from reaching the DB or email alerts.
+
+This layered design makes it straightforward to extend the scraper to new sources, swap out the email backend, or add additional admin features without impacting other components.

 ## Installation

@@ -22,3 +43,205 @@ job scraper
 3. Install dependencies
 4. Set up environment variables
 5. Run the application
+
+## Scheduler Configuration
+
+The application includes an automated scheduler that runs the job scraping process every hour. The scheduler is implemented in `web/craigslist.py` and includes:
+
+- **Automatic Scheduling**: Scraping runs every hour automatically
+- **Failure Handling**: Retry logic with exponential backoff (up to 3 attempts)
+- **Background Operation**: Runs in a separate daemon thread
+- **Graceful Error Recovery**: Continues running even if individual scraping attempts fail
+
+### Scheduler Features
+
+- **Retry Mechanism**: Automatically retries failed scraping attempts
+- **Logging**: Comprehensive logging of scheduler operations and failures
+- **Testing**: Comprehensive test suite in `tests/test_scheduler.py`
+
+To modify the scheduling interval, edit the `start_scheduler()` function in `web/craigslist.py`.
+
+## Job Scraping Output
+
+The `fetch_listings()` function in `web/craigslist.py` extends its output to provide detailed metrics about each scraping operation. It returns a dictionary containing:
+
+- **discovered**: Total number of unique job URLs discovered across all region/keyword combinations
+- **new**: Total number of newly added jobs (jobs not previously in the database)
+- **by_search**: List of dictionaries, each containing:
+  - **region**: The region name for this search
+  - **keyword**: The keyword used for this search
+  - **count**: Number of jobs fetched for this specific region/keyword combination
+
+### Example Output
+
+```python
+{
+    "discovered": 150,
+    "new": 42,
+    "by_search": [
+        {"region": "sfbay", "keyword": "python", "count": 25},
+        {"region": "sfbay", "keyword": "java", "count": 18},
+        {"region": "losangeles", "keyword": "python", "count": 45},
+        {"region": "losangeles", "keyword": "java", "count": 62}
+    ]
+}
+```
+
+This per-search breakdown allows for better monitoring and debugging of the scraping process, enabling identification of searches that may be failing or returning fewer results than expected.
+
+## Contact Information Extraction
+
+The scraper now automatically extracts contact information from job listing pages:
+
+### Extracted Fields
+
+When scraping individual job listings, the following contact information is extracted and stored:
+
+- **contact_email**: Email address extracted from reply button or contact form links
+- **contact_phone**: Phone number extracted from tel links or contact parameters
+- **contact_name**: Contact person or department name if available
+- **reply_url**: The full reply/contact URL from the job listing
+
+### How Contact Information is Extracted
+
+The `extract_contact_info()` function intelligently parses various types of reply URLs:
+
+1. **Mailto Links**: `mailto:jobs@company.com?subject=...`
+
+   - Extracts the email address directly
+
+2. **Phone Links**: `tel:+1234567890`
+
+   - Extracts the phone number
+
+3. **URL Parameters**: `https://apply.company.com?email=hr@company.com&phone=555-1234&name=HR%20Team`
+
+   - Searches for common parameter names: `email`, `phone`, `contact_name`, etc.
+
+4. **Graceful Fallback**: If contact information cannot be extracted, the fields are set to `"N/A"`
+
+### Database Storage
+
+Contact information is stored in the `job_descriptions` table with the following columns:
+
+- `reply_url` (VARCHAR(512)): The complete reply/contact URL
+- `contact_email` (VARCHAR(255)): Extracted email address
+- `contact_phone` (VARCHAR(255)): Extracted phone number
+- `contact_name` (VARCHAR(255)): Extracted contact person/department name
+
+### Example
+
+For a job listing with reply button `mailto:hiring@acme.com?subject=Job%20Application`:
+
+```python
+{
+    "reply_url": "mailto:hiring@acme.com?subject=Job%20Application",
+    "contact_email": "hiring@acme.com",
+    "contact_phone": "N/A",
+    "contact_name": "N/A"
+}
+```
+
+This contact information is automatically extracted during job page scraping and persisted to the database for easy access and filtering.
+
+## Negative Keyword Filtering
+
+The scraper inspects each job’s title, company, location, and description for configurable “negative” keywords. When a keyword matches, the scraped result indicates the match so downstream workflows can skip or flag the job.
+
+### Email Configuration
+
+Define keywords in `config/settings.json` under `scraper.negative_keywords`. Keywords are matched case-insensitively and should be supplied without surrounding whitespace:
+
+```json
+{
+  "scraper": {
+    "negative_keywords": ["scam", "mlm", "unpaid"]
+  }
+}
+```
+
+### Scrape Output
+
+Each `scrape_job_page` result contains three new fields:
+
+- `is_negative_match`: `True` when any keyword matches
+- `negative_keyword_match`: the keyword that triggered the match
+- `negative_match_field`: which field (title, company, location, description) contained the keyword
+
+### Processing Behavior
+
+- `process_job_url` stops when `is_negative_match` is `True`, yielding a log message and calling `remove_job` so stale results never remain in `job_listings`.
+- `upsert_job_details` now returns immediately for negative matches, ensuring `job_descriptions` never stores filtered listings.
+- Regression coverage lives in `tests/test_scraper.py::TestScraperPipelineNegativeFiltering` and `tests/test_db_negative_filtering.py::test_upsert_job_details_skips_negative_match`.
+
+Together, these checks mean negative matches are dropped before any persistence and never shown in the UI.
+
+### User-Specific Negative Keywords
+
+In addition to the global negative keywords defined in `settings.json`, users can define their own personal negative keywords via the **Preferences** page (`/settings`).
+
+- **Management**: Users can add new negative keywords and remove existing ones.
+- **Filtering**: Jobs matching any of the user's negative keywords are filtered out from the job listings view (`/` and `/jobs`).
+- **Validation**: The UI prevents adding duplicate keywords.
+- **Storage**: User-specific negative keywords are stored in the database (`negative_keywords` and `user_negative_keywords` tables).
+
+## Email Notifications
+
+Optional job-alert emails are generated whenever the scraper discovers new listings.
+
+### Configuration
+
+Edit `config/settings.json` under the `email` section:
+
+```json
+{
+  "email": {
+    "enabled": true,
+    "from_address": "jobs@example.com",
+    "recipients": ["alerts@example.com"],
+    "smtp": {
+      "host": "smtp.example.com",
+      "port": 587,
+      "username": "smtp-user",
+      "password": "secret",
+      "use_tls": true,
+      "use_ssl": false,
+      "timeout": 30
+    }
+  }
+}
+```
+
+- Leave `enabled` set to `false` for local development or when credentials are unavailable.
+- Provide at least one recipient; otherwise alerts are skipped with a log message.
+- Omit real credentials from source control—inject them via environment variables or a secrets manager in production.
+
+### How Alerts Are Sent
+
+- After `fetch_listings()` completes, the scraper gathers new listings and, when configured, renders a plaintext digest via `web.email_templates.render_job_alert_email`.
+- Delivery is handled by `web.email_service.send_email`, which supports TLS/SSL SMTP connections and gracefully skips when disabled.
+- Success or failure is streamed in the scraper log output (`Job alert email sent.` or the reason for skipping).
+
+### Managing Recipients
+
+- Admin users can visit `/admin/emails` to add or deactivate subscription addresses through the web UI.
+- Deactivated rows remain in the table so they can be reactivated later; the scraper only mails active recipients.
+- The navigation bar exposes an **Email Alerts** link to the management screen after logging in as an admin user.
+
+### Customising Templates
+
+- Use the **Email Templates** admin page (`/admin/email-templates`) to create, edit, preview, or delete alert templates.
+- Templates support placeholder tokens such as `{count_label}`, `{scope}`, `{timestamp}`, `{jobs_section}`, and `{jobs_message}`; the UI lists all available tokens.
+- Preview renders the selected template with sample data so changes can be reviewed before saving.
+
+### Tests
+
+- `tests/test_email_templates.py` verifies the rendered subject/body for both populated and empty alerts.
+- `tests/test_email_service.py` covers SMTP configuration, disabled mode, and login/send flows using fakes.
+- `tests/test_admin_email.py` exercises the admin UI for listing, subscribing, and unsubscribing recipients.
+- `tests/test_admin_email_templates.py` verifies CRUD operations and previews for template management.
+- `tests/test_scraper.py::TestScraperEmailNotifications` ensures the scraping pipeline invokes the alert sender when new jobs are found.
+
+## Docker Deployment
+
+Please see [README-Docker.md](README-Docker.md) for instructions on deploying the application using Docker.
--- a/config/settings.json
+++ b/config/settings.json
@@ -9,7 +9,7 @@
    }
  },
  "http": {
-    "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:141.0) Gecko/20100101 Firefox/141.0",
+    "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:145.0) Gecko/20100101 Firefox/145.0",
    "request_timeout": 30,
    "max_retries": 3,
    "backoff_factor": 2,
@@ -22,7 +22,22 @@
  },
  "scraper": {
    "base_url": "https://{region}.craigslist.org/search/jjj?query={keyword}&sort=rel",
-    "config_dir": "config"
+    "config_dir": "config",
+    "negative_keywords": []
+  },
+  "email": {
+    "enabled": false,
+    "from_address": "jobs@example.com",
+    "recipients": [],
+    "smtp": {
+      "host": "smtp.example.com",
+      "port": 587,
+      "username": "",
+      "password": "",
+      "use_tls": true,
+      "use_ssl": false,
+      "timeout": 30
+    }
  },
  "users": [
    { "username": "anonymous", "is_admin": false, "password": "" },
--- a/deploy.sh
+++ b/deploy.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# Deployment script for Jobs App
+
+set -e
+
+echo "🚀 Starting Jobs App deployment..."
+
+# Check if Docker is installed
+if ! command -v docker &> /dev/null; then
+    echo "❌ Docker is not installed. Please install Docker first."
+    exit 1
+fi
+
+# Check if docker-compose is installed
+if ! command -v docker-compose &> /dev/null; then
+    echo "❌ docker-compose is not installed. Please install docker-compose first."
+    exit 1
+fi
+
+echo "📦 Building and starting services..."
+docker-compose up --build -d
+
+echo "⏳ Waiting for services to be ready..."
+sleep 30
+
+echo "🔍 Checking service health..."
+if curl -f http://localhost:8000/ &> /dev/null; then
+    echo "✅ Jobs App is running successfully!"
+    echo "🌐 Access the application at: http://localhost:8000"
+    echo "📊 Admin interface: http://localhost:8000/admin/users (login: admin/M11ffpgm.)"
+    echo "🔄 Scraper interface: http://localhost:8000/scrape-page"
+else
+    echo "❌ Jobs App failed to start. Check logs with: docker-compose logs"
+    exit 1
+fi
+
+echo "📋 Useful commands:"
+echo "  View logs: docker-compose logs -f"
+echo "  Stop services: docker-compose down"
+echo "  Restart: docker-compose restart"
+echo "  Rebuild: docker-compose up --build"
--- a/docker-compose-test.yml
+++ b/docker-compose-test.yml
@@ -0,0 +1,40 @@
+version: "3.8"
+
+services:
+  jobs-app:
+    build: .
+    ports:
+      - "8001:8000"
+    environment:
+      # Required environment variables
+      - FLASK_SECRET="localtest8462851903856136136"
+      - FLASK_ENV=production
+
+      # Coolify magic variables
+      - SERVICE_FQDN_JOBS_APP=https://jobs.allucanget.biz
+      - ADMIN_PASSWORD=M11ffpgm.
+      - DB_USER=jobs
+      - DB_PASSWORD=jobdb
+
+      # Optional configuration
+      - GUNICORN_WORKERS=4
+    volumes:
+      - type: bind
+        source: ./cache
+        target: /app/cache
+      - type: bind
+        source: ./logs
+        target: /app/logs
+    labels:
+      - coolify.managed=true
+      - traefik.enable=true
+      - "traefik.http.routers.jobs-app.rule=Host(`${SERVICE_FQDN_JOBS_APP:-localhost}`)"
+      - traefik.http.routers.jobs-app.entryPoints=http
+      - "traefik.http.services.jobs-app.loadbalancer.server.port=8000"
+    networks:
+      - jobs-network
+    restart: unless-stopped
+
+networks:
+  jobs-network:
+    driver: bridge
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,61 @@
+version: "3.8"
+
+services:
+  jobs-app:
+    build: .
+    ports:
+      - "8001:8000"
+    environment:
+      # Required environment variables
+      - FLASK_SECRET=${FLASK_SECRET:?}
+      - FLASK_ENV=${FLASK_ENV:?production}
+
+      # Coolify magic variables
+      - SERVICE_FQDN_JOBS_APP
+      - ADMIN_PASSWORD=${SERVICE_PASSWORD_ADMIN:?M11ffpgm.}
+      - DB_USER=${SERVICE_USER_DB:?jobs}
+      - DB_PASSWORD=${SERVICE_PASSWORD_DB:?jobdb}
+
+      # Optional configuration
+      - GUNICORN_WORKERS=${GUNICORN_WORKERS:-4}
+      - APT_CACHER_NG=${APT_CACHER_NG}
+    volumes:
+      - type: bind
+        source: ./cache
+        target: /app/cache
+      - type: bind
+        source: ./logs
+        target: /app/logs
+    depends_on:
+      - mysql
+    labels:
+      - coolify.managed=true
+      - traefik.enable=true
+      - "traefik.http.routers.jobs-app.rule=Host(`${SERVICE_FQDN_JOBS_APP:-localhost}`)"
+      - traefik.http.routers.jobs-app.entryPoints=http
+      - "traefik.http.services.jobs-app.loadbalancer.server.port=8000"
+    networks:
+      - jobs-network
+    restart: unless-stopped
+
+  mysql:
+    image: mysql:8.0
+    environment:
+      - MYSQL_ROOT_PASSWORD=${MYSQL_ROOT_PASSWORD:?rootpassword}
+      - MYSQL_DATABASE=jobs
+      - MYSQL_USER=${DB_USER:-jobs}
+      - MYSQL_PASSWORD=${DB_PASSWORD:-jobdb}
+    ports:
+      - "3306:3306"
+    volumes:
+      - mysql_data:/var/lib/mysql
+      - ./mysql-init:/docker-entrypoint-initdb.d
+    networks:
+      - jobs-network
+
+volumes:
+  mysql_data:
+
+networks:
+  jobs-network:
+    driver: bridge
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+# Docker entrypoint script for Jobs App
+
+set -e
+
+echo "🚀 Starting Jobs App container..."
+
+# Wait for MySQL to be ready
+echo "⏳ Waiting for MySQL to be ready..."
+python -c "
+import time
+import pymysql
+while True:
+    try:
+        conn = pymysql.connect(
+            host='192.168.88.37',
+            user='jobs',
+            password='jobdb',
+            database='jobs',
+            connect_timeout=5
+        )
+        conn.close()
+        print('✅ MySQL is ready!')
+        break
+    except pymysql.Error as e:
+        print(f'MySQL is not ready: {e}, waiting...')
+        time.sleep(2)
+"
+
+# Run database setup
+echo "🗄️ Setting up database..."
+python setup.py mysql-init
+
+# Seed initial data if needed
+echo "🌱 Seeding initial data..."
+python -c "
+from web.db import db_init
+from web.utils import initialize_users_from_settings
+db_init()
+try:
+    initialize_users_from_settings()
+    print('✅ Users seeded successfully')
+except Exception as e:
+    print(f'⚠️ User seeding failed: {e}')
+"
+
+echo "🎯 Starting Gunicorn server..."
+exec "$@"
--- a/gunicorn.conf.py
+++ b/gunicorn.conf.py
@@ -0,0 +1,38 @@
+# Gunicorn configuration file
+import multiprocessing
+
+# Server socket
+bind = "0.0.0.0:8000"
+backlog = 2048
+
+# Worker processes
+workers = multiprocessing.cpu_count() * 2 + 1
+worker_class = "sync"
+worker_connections = 1000
+max_requests = 1000
+max_requests_jitter = 50
+timeout = 30
+keepalive = 2
+
+# Logging
+loglevel = "info"
+accesslog = "-"
+errorlog = "-"
+
+# Process naming
+proc_name = "jobs_app"
+
+# Server mechanics
+daemon = False
+pidfile = "/tmp/gunicorn.pid"
+user = None
+group = None
+tmp_upload_dir = None
+
+# SSL (if needed)
+keyfile = None
+certfile = None
+
+# Application
+wsgi_module = "web.app:app"
+callable = "app"
--- a/main.py
+++ b/main.py
@@ -4,8 +4,20 @@ starts webserver
 """

 import web.app as app
+import threading
+from web.craigslist import start_scheduler
+
+
+def start_background_scheduler():
+    """Start the scheduler in a background thread."""
+    scheduler_thread = threading.Thread(target=start_scheduler, daemon=True)
+    scheduler_thread.start()
+    print("Background scheduler started")


 if __name__ == "__main__":
+    # Start scheduler in background thread
+    start_background_scheduler()
+
    # start web server
    app.main()
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,6 +3,7 @@ flask
 flask-wtf
 pytest
 requests
+schedule
 sqlalchemy
 pymysql
 gunicorn
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 """
-MySQL utilities for Craigslist project.
+MySQL utility script for initializing database and showing row counts.

 Usage (PowerShell):
  # Ensure MySQL database and tables
@@ -36,15 +36,15 @@ try:
        engine = create_engine(url, future=True)
        with engine.begin() as conn:
            for table in [
-                "users",
-                "regions",
-                "keywords",
-                "user_regions",
-                "user_keywords",
-                "job_listings",
                "job_descriptions",
-                "cached_pages",
+                "job_listings",
+                "keywords",
+                "logs",
+                "regions",
+                "users",
                "user_interactions",
+                "user_keywords",
+                "user_regions",
            ]:
                try:
                    n = conn.execute(text(f"SELECT COUNT(*) FROM {table}"))
--- a/tests/test_admin_email.py
+++ b/tests/test_admin_email.py
@@ -0,0 +1,84 @@
+import pytest
+from sqlalchemy import text
+
+from web.app import app
+from web.db import (
+    db_init,
+    create_or_update_user,
+    subscribe_email,
+    list_email_subscriptions,
+    _ensure_session,
+)
+
+
+@pytest.fixture(scope="function", autouse=True)
+def initialize_app():
+    app.config.update(TESTING=True, WTF_CSRF_ENABLED=False)
+    with app.app_context():
+        db_init()
+        create_or_update_user("admin", password="secret",
+                              is_admin=True, is_active=True)
+    # Clear subscriptions before and after each test to avoid leakage
+    with _ensure_session() as session:
+        session.execute(text("DELETE FROM email_subscriptions"))
+        session.commit()
+    yield
+    with _ensure_session() as session:
+        session.execute(text("DELETE FROM email_subscriptions"))
+        session.commit()
+
+
+@pytest.fixture
+def client():
+    with app.test_client() as test_client:
+        with test_client.session_transaction() as sess:
+            sess["username"] = "admin"
+        yield test_client
+
+
+@pytest.fixture
+def anon_client():
+    with app.test_client() as test_client:
+        # Ensure no admin session present
+        with test_client.session_transaction() as sess:
+            sess.pop("username", None)
+        yield test_client
+
+
+def test_admin_emails_requires_admin(anon_client):
+    response = anon_client.get("/admin/emails")
+    assert response.status_code == 302
+    assert "/login" in response.headers.get("Location", "")
+
+
+def test_admin_emails_lists_subscriptions(client):
+    subscribe_email("alice@example.com")
+    response = client.get("/admin/emails")
+    assert response.status_code == 200
+    assert b"alice@example.com" in response.data
+
+
+def test_admin_emails_can_subscribe(client):
+    response = client.post(
+        "/admin/emails",
+        data={"action": "subscribe", "email": "bob@example.com"},
+        follow_redirects=False,
+    )
+    assert response.status_code == 302
+    emails = list_email_subscriptions()
+    assert any(sub["email"] == "bob@example.com" and sub["is_active"]
+               for sub in emails)
+
+
+def test_admin_emails_can_unsubscribe(client):
+    subscribe_email("carol@example.com")
+    response = client.post(
+        "/admin/emails",
+        data={"action": "unsubscribe", "email": "carol@example.com"},
+        follow_redirects=False,
+    )
+    assert response.status_code == 302
+    emails = list_email_subscriptions()
+    matching = [sub for sub in emails if sub["email"] == "carol@example.com"]
+    assert matching
+    assert matching[0]["is_active"] is False
--- a/tests/test_admin_email_templates.py
+++ b/tests/test_admin_email_templates.py
@@ -0,0 +1,138 @@
+import pytest
+from sqlalchemy import text
+
+from web.app import app
+from web.db import (
+    db_init,
+    create_or_update_user,
+    list_email_templates,
+    update_email_template,
+    _ensure_session,
+    ensure_default_email_template,
+)
+from web.email_templates import render_job_alert_email
+
+
+@pytest.fixture(scope="function", autouse=True)
+def setup_database():
+    app.config.update(TESTING=True, WTF_CSRF_ENABLED=False)
+    with app.app_context():
+        db_init()
+        create_or_update_user("admin", password="secret", is_admin=True, is_active=True)
+    with _ensure_session() as session:
+        session.execute(text("DELETE FROM email_templates"))
+        session.commit()
+    ensure_default_email_template()
+    yield
+    with _ensure_session() as session:
+        session.execute(text("DELETE FROM email_templates"))
+        session.commit()
+    ensure_default_email_template()
+
+
+@pytest.fixture
+def client():
+    with app.test_client() as test_client:
+        with test_client.session_transaction() as sess:
+            sess["username"] = "admin"
+        yield test_client
+
+
+@pytest.fixture
+def anon_client():
+    with app.test_client() as test_client:
+        with test_client.session_transaction() as sess:
+            sess.pop("username", None)
+        yield test_client
+
+
+def test_email_templates_requires_admin(anon_client):
+    response = anon_client.get("/admin/email-templates")
+    assert response.status_code == 302
+    assert "/login" in response.headers.get("Location", "")
+
+
+def test_email_templates_lists_default(client):
+    response = client.get("/admin/email-templates")
+    assert response.status_code == 200
+    assert b"job-alert" in response.data
+
+
+def test_email_templates_create_update_delete(client):
+    # Create
+    response = client.post(
+        "/admin/email-templates",
+        data={
+            "action": "create",
+            "name": "Daily Summary",
+            "slug": "daily-summary",
+            "subject": "Summary: {count_label}",
+            "body": "Jobs:{jobs_section}",
+            "is_active": "on",
+        },
+        follow_redirects=False,
+    )
+    assert response.status_code == 302
+    templates = list_email_templates()
+    assert any(t["slug"] == "daily-summary" for t in templates)
+
+    # Update
+    template_row = next(t for t in templates if t["slug"] == "daily-summary")
+    response = client.post(
+        "/admin/email-templates",
+        data={
+            "action": "update",
+            "template_id": template_row["template_id"],
+            "name": "Daily Summary",
+            "slug": "daily-summary",
+            "subject": "Updated: {count_label}",
+            "body": "Updated body {jobs_section}",
+        },
+        follow_redirects=False,
+    )
+    assert response.status_code == 302
+    updated = list_email_templates()
+    updated_row = next(t for t in updated if t["slug"] == "daily-summary")
+    assert "Updated:" in updated_row["subject"]
+
+    # Delete
+    response = client.post(
+        "/admin/email-templates",
+        data={
+            "action": "delete",
+            "template_id": updated_row["template_id"],
+        },
+        follow_redirects=False,
+    )
+    assert response.status_code == 302
+    slugs = [t["slug"] for t in list_email_templates()]
+    assert "daily-summary" not in slugs
+
+
+def test_email_templates_preview(client):
+    templates = list_email_templates()
+    job_alert = next(t for t in templates if t["slug"] == "job-alert")
+    response = client.get(f"/admin/email-templates?preview_id={job_alert['template_id']}")
+    assert response.status_code == 200
+    assert b"Preview" in response.data
+    assert b"Subject" in response.data
+
+
+def test_render_job_alert_email_uses_template_override(client):
+    templates = list_email_templates()
+    job_alert = next(t for t in templates if t["slug"] == "job-alert")
+    update_email_template(
+        job_alert["template_id"],
+        subject="Custom Subject {count}",
+        body="Body {jobs_message}",
+    )
+    rendered = render_job_alert_email([
+        {
+            "title": "Python Developer",
+            "company": "Acme",
+            "location": "Remote",
+            "url": "https://example.com",
+        }
+    ])
+    assert rendered["subject"].startswith("Custom Subject")
+    assert "Python Developer" in rendered["body"]
--- a/tests/test_cache_paths.py
+++ b/tests/test_cache_paths.py
@@ -1,66 +0,0 @@
-import os
-import tempfile
-import pytest
-
-import web.db as db
-from web.utils import get_cache_dir
-
-
-# Skip unless explicitly enabled (MySQL integration expected)
-if not os.getenv("RUN_DB_TESTS"):
-    pytest.skip("Set RUN_DB_TESTS=1 to run cache path integration tests",
-                allow_module_level=True)
-
-
-def test_db_sync_inserts_relative_paths(tmp_path, monkeypatch):
-    # arrange: create a temporary cache dir and a fake html file
-    cache_dir = tmp_path / "cache"
-    cache_dir.mkdir()
-    f = cache_dir / "example.org_path_to_page_123.html"
-    f.write_text("<html>ok</html>")
-
-    # point app at this cache dir
-    monkeypatch.setenv("PYTEST_CACHE_DIR", str(cache_dir))
-    # monkeypatch get_cache_dir used by db functions
-    monkeypatch.setattr('web.utils.get_cache_dir', lambda: str(cache_dir))
-
-    # ensure DB initialized
-    db.db_init()
-
-    # act
-    db.db_sync_cached_pages(str(cache_dir))
-
-    # assert: DB contains relative path, not absolute
-    rows = db.db_get_all_cached_pages()
-    assert any(r['file_path'] == os.path.relpath(
-        str(f), start=str(cache_dir)) for r in rows)
-
-
-def test_normalize_cached_page_paths_converts_absolute(tmp_path, monkeypatch):
-    cache_dir = tmp_path / "cache"
-    cache_dir.mkdir()
-    # create an actual file
-    f = cache_dir / "site_example_page_1.html"
-    f.write_text("<html>ok</html>")
-
-    monkeypatch.setattr('web.utils.get_cache_dir', lambda: str(cache_dir))
-    db.db_init()
-
-    abs_fp = str(f)
-    rel_fp = os.path.relpath(abs_fp, start=str(cache_dir))
-
-    # Insert an absolute path row directly (simulate legacy data)
-    with db._ensure_session() as session:
-        session.execute(
-            db.text("INSERT INTO cached_pages(file_path, url_guess, last_modified, size_bytes, job_id) VALUES(:fp, :ug, :lm, :sz, :jid)"),
-            {"fp": abs_fp, "ug": "https://example.org/page1.html",
-                "lm": None, "sz": 10, "jid": None}
-        )
-        session.commit()
-
-    # normalize should convert absolute to relative
-    changed = db.normalize_cached_page_paths()
-    assert changed >= 1
-
-    rows = db.db_get_all_cached_pages()
-    assert any(r['file_path'] == rel_fp for r in rows)
--- a/tests/test_cached_route.py
+++ b/tests/test_cached_route.py
@@ -1,53 +0,0 @@
-import os
-import tempfile
-from web.app import app
-
-
-def test_cached_route_serves_file(monkeypatch):
-    # Create a temporary file in the configured cache dir
-    cache_dir = os.path.abspath(os.path.join(
-        os.path.dirname(__file__), '..', 'cache'))
-    os.makedirs(cache_dir, exist_ok=True)
-    fd, tmp_path = tempfile.mkstemp(
-        prefix='test_cached_', suffix='.html', dir=cache_dir)
-    os.close(fd)
-    with open(tmp_path, 'w', encoding='utf-8') as f:
-        f.write('<html><body>cached</body></html>')
-
-    # Fake job record returned by get_job_by_id
-    fake_job = {
-        'id': 'fake123',
-        'job_id': 'fake123',
-        'file_path': os.path.relpath(tmp_path, cache_dir),
-        'file_path_abs': tmp_path,
-    }
-
-    def fake_get_job_by_id(jid):
-        if str(jid) in ('fake123',):
-            return fake_job
-        return {}
-
-    # Patch the symbol imported into web.app
-    monkeypatch.setattr('web.app.get_job_by_id', fake_get_job_by_id)
-
-    # Request route
-    client = app.test_client()
-    res = client.get('/cached/fake123')
-    assert res.status_code == 200
-    assert b'cached' in res.data
-
-    # Cleanup
-    try:
-        os.remove(tmp_path)
-    except Exception:
-        pass
-
-
-def test_cached_route_missing(monkeypatch):
-    def fake_get_job_by_id(jid):
-        return {}
-
-    monkeypatch.setattr('web.app.get_job_by_id', fake_get_job_by_id)
-    client = app.test_client()
-    res = client.get('/cached/nope')
-    assert res.status_code == 404
--- a/tests/test_cachedpage_abs_path.py
+++ b/tests/test_cachedpage_abs_path.py
@@ -1,27 +0,0 @@
-import os
-from web.db import CachedPage
-from web.utils import get_cache_dir
-
-
-def test_cachedpage_abs_path(tmp_path, monkeypatch):
-    # Create a fake cache dir and monkeypatch get_cache_dir
-    fake_cache = tmp_path / 'cache'
-    fake_cache.mkdir()
-    monkeypatch.setenv('PYTHONIOENCODING', 'utf-8')
-
-    # Patch the symbol used by CachedPage.abs_path (imported into web.db)
-    monkeypatch.setattr('web.db.get_cache_dir', lambda: str(fake_cache))
-
-    # Create a CachedPage instance and set file_path attribute
-    cp = CachedPage()
-    setattr(cp, 'file_path', 'subdir/test.html')
-
-    # Ensure the computed absolute path joins the fake cache dir
-    expected = os.path.join(os.path.abspath(
-        str(fake_cache)), 'subdir/test.html')
-    assert cp.abs_path == expected
-
-    # When file_path is falsy, abs_path should be None
-    cp2 = CachedPage()
-    setattr(cp2, 'file_path', None)
-    assert cp2.abs_path is None
--- a/tests/test_db_integration.py
+++ b/tests/test_db_integration.py
@@ -95,39 +95,6 @@ def test_upsert_listing_details_and_urls(db_ready):
        pass


-def test_cached_page_upsert_and_get(db_ready):
-    jid_suffix = unique_suffix()
-    url = f"https://example.org/it/{jid_suffix}.html"
-    # Ensure a listing exists for FK relation if enforced
-    db.upsert_listing(
-        url=url,
-        region="it",
-        keyword="cache",
-        title=f"IT Cache {jid_suffix}",
-        pay="N/A",
-        location="Test City",
-        timestamp=now_iso(),
-    )
-    fp = f"/tmp/integration_{jid_suffix}.html"
-    db.upsert_cached_page(
-        file_path=fp,
-        url_guess=url,
-        last_modified=now_iso(),
-        size_bytes=123,
-        job_id=int(jid_suffix) if jid_suffix.isdigit() else None,
-    )
-    row = db.db_get_cache_url(url)
-    if row is not None:
-        assert row["url_guess"] == url
-    # Cleanup
-    try:
-        db.remove_cached_page(fp)
-        db.db_remove_cached_url(url)
-        db.db_delete_job(jid_suffix)
-    except Exception:
-        pass
-
-
 def test_user_interactions_mark_and_visit(db_ready):
    uname = f"it_user_{unique_suffix()}"
    db.create_or_update_user(uname, is_active=True)
--- a/tests/test_db_negative_filtering.py
+++ b/tests/test_db_negative_filtering.py
@@ -0,0 +1,21 @@
+import pytest
+import web.db as db
+
+
+def test_upsert_job_details_skips_negative_match(monkeypatch):
+    def fail(*args, **kwargs):  # pragma: no cover - guard against unwanted calls
+        raise AssertionError("should not reach database layers when negative")
+
+    monkeypatch.setattr(db, "_ensure_session", fail)
+    monkeypatch.setattr(db, "insert_log", fail)
+
+    job_data = {
+        "url": "https://example.com/job/neg",
+        "id": "neg123",
+        "is_negative_match": True,
+        "negative_keyword_match": "scam",
+        "negative_match_field": "title",
+    }
+
+    # Should return early without touching the database helpers.
+    db.upsert_job_details(job_data)
--- a/tests/test_email_service.py
+++ b/tests/test_email_service.py
@@ -0,0 +1,106 @@
+import pytest
+
+from web.email_service import (
+    EmailConfigurationError,
+    send_email,
+)
+
+
+def test_send_email_disabled(monkeypatch):
+    called = {}
+
+    def _fake_smtp(*args, **kwargs):  # pragma: no cover - should not be called
+        called["used"] = True
+        raise AssertionError(
+            "SMTP should not be invoked when email is disabled")
+
+    monkeypatch.setattr("web.email_service.smtplib.SMTP", _fake_smtp)
+    monkeypatch.setattr("web.email_service.smtplib.SMTP_SSL", _fake_smtp)
+
+    result = send_email(
+        subject="Hi",
+        body="Test",
+        to="user@example.com",
+        settings={"enabled": False},
+    )
+    assert result is False
+    assert called == {}
+
+
+def test_send_email_sends_message(monkeypatch):
+    events = {"starttls": False, "login": None, "sent": None}
+
+    class FakeSMTP:
+        def __init__(self, *, host, port, timeout):
+            self.host = host
+            self.port = port
+            self.timeout = timeout
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, exc_type, exc, tb):
+            return False
+
+        def ehlo(self):
+            events.setdefault("ehlo", 0)
+            events["ehlo"] += 1
+
+        def starttls(self):
+            events["starttls"] = True
+
+        def login(self, username, password):
+            events["login"] = (username, password)
+
+        def send_message(self, message, *, from_addr, to_addrs):
+            events["sent"] = {
+                "from": from_addr,
+                "to": tuple(to_addrs),
+                "subject": message["Subject"],
+            }
+
+    monkeypatch.setattr("web.email_service.smtplib.SMTP", FakeSMTP)
+    monkeypatch.setattr("web.email_service.smtplib.SMTP_SSL", FakeSMTP)
+
+    settings = {
+        "enabled": True,
+        "from_address": "jobs@example.com",
+        "smtp": {
+            "host": "smtp.example.com",
+            "port": 2525,
+            "timeout": 15,
+            "username": "jobs",
+            "password": "secret",
+            "use_tls": True,
+            "use_ssl": False,
+        },
+    }
+
+    result = send_email(
+        subject="New Jobs",
+        body="You have new jobs waiting.",
+        to=["a@example.com", "b@example.com"],
+        cc="c@example.com",
+        bcc=["d@example.com"],
+        settings=settings,
+    )
+
+    assert result is True
+    assert events["starttls"] is True
+    assert events["login"] == ("jobs", "secret")
+    assert events["sent"] == {
+        "from": "jobs@example.com",
+        "to": ("a@example.com", "b@example.com", "c@example.com", "d@example.com"),
+        "subject": "New Jobs",
+    }
+
+
+def test_send_email_requires_host():
+    settings = {
+        "enabled": True,
+        "from_address": "jobs@example.com",
+        "smtp": {"host": "", "port": 587},
+    }
+    with pytest.raises(EmailConfigurationError):
+        send_email(subject="Hi", body="Test",
+                   to="user@example.com", settings=settings)
--- a/tests/test_email_templates.py
+++ b/tests/test_email_templates.py
@@ -0,0 +1,40 @@
+from datetime import datetime
+
+from web.email_templates import render_job_alert_email
+
+
+def test_render_job_alert_email_with_jobs():
+    jobs = [
+        {
+            "title": "Python Developer",
+            "company": "Acme",
+            "location": "Remote",
+            "url": "https://example.com/jobs/1",
+        },
+        {
+            "title": "Data Engineer",
+            "company": "Globex",
+            "location": "NYC",
+            "url": "https://example.com/jobs/2",
+        },
+    ]
+    ts = datetime(2025, 11, 3, 12, 0)
+    rendered = render_job_alert_email(
+        jobs, region="sfbay", keyword="python", generated_at=ts)
+
+    assert rendered["subject"] == "2 new jobs (region: sfbay, keyword: python)"
+    assert "1. Python Developer" in rendered["body"]
+    assert "Generated at 2025-11-03 12:00 UTC." in rendered["body"]
+    assert rendered["context"]["count"] == 2
+    assert rendered["context"]["jobs_section"].startswith(
+        "\n1. Python Developer")
+
+
+def test_render_job_alert_email_empty():
+    ts = datetime(2025, 11, 3, 12, 0)
+    rendered = render_job_alert_email([], generated_at=ts)
+
+    assert rendered["subject"] == "No new jobs"
+    assert "No jobs matched this alert." in rendered["body"]
+    assert rendered["body"].count("Generated at") == 1
+    assert rendered["context"]["count"] == 0
--- a/tests/test_scheduler.py
+++ b/tests/test_scheduler.py
@@ -0,0 +1,137 @@
+import pytest
+import time
+from unittest.mock import patch, MagicMock
+from web.craigslist import scrape_jobs_with_retry, run_scheduled_scraping, fetch_listings
+
+
+class TestScheduler:
+
+    def test_scrape_jobs_with_retry_success(self):
+        """Test that scrape_jobs_with_retry succeeds on first attempt."""
+        with patch('web.craigslist.scraper') as mock_scrape:
+            result = scrape_jobs_with_retry()
+            assert result is True
+            mock_scrape.assert_called_once()
+
+    def test_scrape_jobs_with_retry_failure(self):
+        """Test that scrape_jobs_with_retry handles failures properly."""
+        with patch('web.craigslist.scraper', side_effect=Exception("Test error")) as mock_scrape:
+            result = scrape_jobs_with_retry(max_retries=2)
+            assert result is False
+            assert mock_scrape.call_count == 2
+
+    def test_run_scheduled_scraping(self):
+        """Test the scheduled scraping wrapper function."""
+        with patch('web.craigslist.scrape_jobs_with_retry') as mock_retry:
+            mock_retry.return_value = True
+            run_scheduled_scraping()
+            mock_retry.assert_called_once()
+
+    def test_scheduler_import(self):
+        """Test that scheduler functions can be imported."""
+        from web.craigslist import start_scheduler
+        assert callable(start_scheduler)
+
+    @patch('web.craigslist.schedule')
+    def test_scheduler_setup(self, mock_schedule):
+        """Test that scheduler setup works correctly."""
+        # This is a basic test to ensure the scheduler can be set up
+        from web.craigslist import schedule
+        assert schedule is not None
+
+    @patch('web.craigslist.db_get_all_job_urls')
+    @patch('web.craigslist.seed_regions_keywords_from_listings')
+    @patch('web.craigslist.get_all_regions')
+    @patch('web.craigslist.get_all_keywords')
+    @patch('web.craigslist.get_last_fetch_time')
+    @patch('web.craigslist.process_region_keyword')
+    @patch('web.craigslist.upsert_listing')
+    @patch('web.craigslist.insert_log')
+    def test_fetch_listings_return_structure(self, mock_log, mock_upsert, mock_process, mock_last_fetch,
+                                             mock_keywords, mock_regions, mock_seed, mock_db_urls):
+        """Test that fetch_listings returns the correct structure with per-search counts."""
+        # Setup mocks
+        mock_db_urls.return_value = []
+        mock_regions.return_value = [{"name": "sfbay"}]
+        mock_keywords.return_value = [{"name": "python"}]
+        mock_last_fetch.return_value = None  # Never fetched before
+        mock_process.return_value = [
+            ("2025-11-03T10:00:00Z", "sfbay", "python", "Python Dev",
+             "$100k", "San Francisco", "http://example.com/1"),
+            ("2025-11-03T10:00:00Z", "sfbay", "python", "Python Dev",
+             "$100k", "San Francisco", "http://example.com/2"),
+        ]
+
+        # Collect messages and get return value from generator
+        gen = fetch_listings()
+        messages = []
+        result = None
+        try:
+            while True:
+                messages.append(next(gen))
+        except StopIteration as e:
+            result = e.value
+
+        # Verify return structure
+        assert result is not None
+        assert "discovered" in result
+        assert "new" in result
+        assert "by_search" in result
+        assert isinstance(result.get("by_search"), list)
+        assert result.get("discovered") == 2
+        assert result.get("new") == 2
+
+    @patch('web.craigslist.db_get_all_job_urls')
+    @patch('web.craigslist.seed_regions_keywords_from_listings')
+    @patch('web.craigslist.get_all_regions')
+    @patch('web.craigslist.get_all_keywords')
+    @patch('web.craigslist.get_last_fetch_time')
+    @patch('web.craigslist.process_region_keyword')
+    @patch('web.craigslist.upsert_listing')
+    @patch('web.craigslist.insert_log')
+    def test_fetch_listings_per_search_count(self, mock_log, mock_upsert, mock_process, mock_last_fetch,
+                                             mock_keywords, mock_regions, mock_seed, mock_db_urls):
+        """Test that fetch_listings correctly counts jobs per search."""
+        # Setup mocks
+        mock_db_urls.return_value = []
+        mock_regions.return_value = [{"name": "sfbay"}, {"name": "losangeles"}]
+        mock_keywords.return_value = [{"name": "python"}, {"name": "java"}]
+        mock_last_fetch.return_value = None  # Never fetched before
+
+        # Mock process_region_keyword to return different counts for each search
+        def mock_process_impl(region, keyword, discovered_urls):
+            # Use unique URLs per search to get the total discovered count
+            base_url = f"http://example.com/{region}/{keyword}"
+            counts = {
+                ("sfbay", "python"): 3,
+                ("sfbay", "java"): 2,
+                ("losangeles", "python"): 4,
+                ("losangeles", "java"): 1,
+            }
+            count = counts.get((region, keyword), 0)
+            return [(f"2025-11-03T10:00:00Z", region, keyword, f"Job {i}", "$100k", region, f"{base_url}/{i}")
+                    for i in range(count)]
+
+        mock_process.side_effect = mock_process_impl
+
+        # Collect result from generator
+        gen = fetch_listings()
+        messages = []
+        result = None
+        try:
+            while True:
+                messages.append(next(gen))
+        except StopIteration as e:
+            result = e.value
+
+        # Verify per-search counts
+        assert result is not None
+        by_search = result.get("by_search", [])
+        assert len(by_search) == 4
+
+        search_data = {(r.get("region"), r.get("keyword"))                       : r.get("count") for r in by_search}
+        assert search_data.get(("sfbay", "python")) == 3
+        assert search_data.get(("sfbay", "java")) == 2
+        assert search_data.get(("losangeles", "python")) == 4
+        assert search_data.get(("losangeles", "java")) == 1
+        assert result.get("discovered") == 10  # Total unique jobs
--- a/tests/test_scraper.py
+++ b/tests/test_scraper.py
@@ -0,0 +1,384 @@
+import pytest
+from web.scraper import scrape_job_page, extract_contact_info
+from web.craigslist import process_job_url, scraper
+
+
+def _make_negative_job(url: str) -> dict:
+    return {
+        "url": url,
+        "title": "SCAM role",
+        "company": "Test Co",
+        "location": "Remote",
+        "description": "This is a scam offer",
+        "id": "job123",
+        "posted_time": "",
+        "reply_url": "N/A",
+        "contact_email": "N/A",
+        "contact_phone": "N/A",
+        "contact_name": "N/A",
+        "is_negative_match": True,
+        "negative_keyword_match": "scam",
+        "negative_match_field": "title",
+    }
+
+
+class TestExtractContactInfo:
+    """Test suite for contact information extraction."""
+
+    def test_extract_email_from_mailto_link(self):
+        """Test extraction of email from mailto link."""
+        reply_url = "mailto:contact@example.com?subject=Job%20Inquiry"
+        contact_info = extract_contact_info(reply_url)
+
+        assert contact_info["email"] == "contact@example.com"
+        assert contact_info["phone"] == "N/A"
+        assert contact_info["contact_name"] == "N/A"
+
+    def test_extract_phone_from_tel_link(self):
+        """Test extraction of phone from tel link."""
+        reply_url = "tel:+1234567890"
+        contact_info = extract_contact_info(reply_url)
+
+        assert contact_info["email"] == "N/A"
+        assert contact_info["phone"] == "+1234567890"
+        assert contact_info["contact_name"] == "N/A"
+
+    def test_extract_email_from_url_parameter(self):
+        """Test extraction of email from URL query parameters."""
+        reply_url = "https://example.com/contact?email=jobs@company.com&name=John%20Doe"
+        contact_info = extract_contact_info(reply_url)
+
+        assert contact_info["email"] == "jobs@company.com"
+        assert contact_info["contact_name"] == "John Doe"
+
+    def test_extract_phone_from_url_parameter(self):
+        """Test extraction of phone from URL query parameters."""
+        reply_url = "https://example.com/apply?phone=555-1234&email=contact@test.com"
+        contact_info = extract_contact_info(reply_url)
+
+        assert contact_info["phone"] == "555-1234"
+        assert contact_info["email"] == "contact@test.com"
+
+    def test_extract_contact_name_from_url_parameter(self):
+        """Test extraction of contact name from URL query parameters."""
+        reply_url = "https://example.com/reply?name=Alice%20Smith&contact_name=Bob%20Jones"
+        contact_info = extract_contact_info(reply_url)
+
+        # Should prefer contact_name over name
+        assert contact_info["contact_name"] == "Bob Jones"
+
+    def test_extract_all_fields_from_url(self):
+        """Test extraction of all fields from URL parameters."""
+        reply_url = "https://example.com/contact?email=hr@company.com&phone=555-9876&contact_name=Jane%20Doe"
+        contact_info = extract_contact_info(reply_url)
+
+        assert contact_info["email"] == "hr@company.com"
+        assert contact_info["phone"] == "555-9876"
+        assert contact_info["contact_name"] == "Jane Doe"
+
+    def test_handle_empty_reply_url(self):
+        """Test handling of empty reply URL."""
+        contact_info = extract_contact_info("")
+
+        assert contact_info["email"] == "N/A"
+        assert contact_info["phone"] == "N/A"
+        assert contact_info["contact_name"] == "N/A"
+
+    def test_handle_na_reply_url(self):
+        """Test handling of N/A reply URL."""
+        contact_info = extract_contact_info("N/A")
+
+        assert contact_info["email"] == "N/A"
+        assert contact_info["phone"] == "N/A"
+        assert contact_info["contact_name"] == "N/A"
+
+    def test_handle_none_reply_url(self):
+        """Test handling of None reply URL."""
+        contact_info = extract_contact_info(None)
+
+        assert contact_info["email"] == "N/A"
+        assert contact_info["phone"] == "N/A"
+        assert contact_info["contact_name"] == "N/A"
+
+    def test_handle_invalid_url(self):
+        """Test handling of invalid URL (graceful fallback)."""
+        reply_url = "not a valid url at all"
+        contact_info = extract_contact_info(reply_url)
+
+        # Should return all N/A values without crashing
+        assert contact_info["email"] == "N/A"
+        assert contact_info["phone"] == "N/A"
+        assert contact_info["contact_name"] == "N/A"
+
+    def test_multiple_parameter_variations(self):
+        """Test that function finds email despite multiple parameter name variations."""
+        reply_url = "https://example.com/reply?from_email=sender@test.com&other=value"
+        contact_info = extract_contact_info(reply_url)
+
+        assert contact_info["email"] == "sender@test.com"
+
+    def test_telephone_parameter_name(self):
+        """Test extraction using 'telephone' parameter name."""
+        reply_url = "https://example.com/contact?telephone=555-0000"
+        contact_info = extract_contact_info(reply_url)
+
+        assert contact_info["phone"] == "555-0000"
+
+
+class TestScrapeJobPageContactInfo:
+    """Test suite for scrape_job_page contact information extraction."""
+
+    def test_scrape_job_page_includes_contact_fields(self):
+        """Test that scrape_job_page includes contact information in return dict."""
+        html_content = """
+        <html>
+            <h1 class="postingtitle">Software Engineer</h1>
+            <h2 class="company-name">Tech Company</h2>
+            <button class="reply-button" data-href="mailto:jobs@techco.com"></button>
+            <div id="map" data-latitude="37.7749" data-longitude="-122.4194" data-accuracy="rooftop"></div>
+            <section id="postingbody">
+                <p>This is a test job description</p>
+            </section>
+            <div class="postinginfos">
+                <p class="postinginfo">posting id: 12345abc</p>
+                <time class="date timeago" datetime="2025-11-03T10:00:00"></time>
+            </div>
+        </html>
+        """
+
+        job_data = scrape_job_page(html_content, "https://example.com/job/123")
+
+        # Verify all expected keys are present
+        assert "contact_email" in job_data
+        assert "contact_phone" in job_data
+        assert "contact_name" in job_data
+        assert "reply_url" in job_data
+
+    def test_scrape_job_page_extracts_mailto_contact(self):
+        """Test that scrape_job_page correctly extracts email from mailto link."""
+        html_content = """
+        <html>
+            <h1 class="postingtitle">Job Title</h1>
+            <h2 class="company-name">Company</h2>
+            <button class="reply-button" data-href="mailto:hiring@company.com?subject=Application"></button>
+            <div id="map"></div>
+            <section id="postingbody"><p>Job desc</p></section>
+            <div class="postinginfos">
+                <p class="postinginfo">id: xyz</p>
+            </div>
+        </html>
+        """
+
+        job_data = scrape_job_page(html_content, "https://example.com/job/456")
+
+        assert job_data["contact_email"] == "hiring@company.com"
+        assert job_data["reply_url"] == "mailto:hiring@company.com?subject=Application"
+
+    def test_scrape_job_page_no_reply_button(self):
+        """Test scrape_job_page when no reply button is present."""
+        html_content = """
+        <html>
+            <h1 class="postingtitle">Job Title</h1>
+            <h2 class="company-name">Company</h2>
+            <div id="map"></div>
+            <section id="postingbody"><p>Job desc</p></section>
+            <div class="postinginfos">
+                <p class="postinginfo">id: xyz</p>
+            </div>
+        </html>
+        """
+
+        job_data = scrape_job_page(html_content, "https://example.com/job/789")
+
+        # Should have N/A for all contact fields
+        assert job_data["reply_url"] == "N/A"
+        assert job_data["contact_email"] == "N/A"
+        assert job_data["contact_phone"] == "N/A"
+        assert job_data["contact_name"] == "N/A"
+
+    def test_scrape_job_page_with_url_based_reply(self):
+        """Test scrape_job_page with URL-based reply link containing contact info."""
+        html_content = """
+        <html>
+            <h1 class="postingtitle">Manager Position</h1>
+            <h2 class="company-name">BigCorp</h2>
+            <button class="reply-button" data-href="https://apply.bigcorp.com?email=hr@bigcorp.com&name=HR%20Team"></button>
+            <div id="map"></div>
+            <section id="postingbody"><p>Apply now</p></section>
+            <div class="postinginfos">
+                <p class="postinginfo">id: manager123</p>
+            </div>
+        </html>
+        """
+
+        job_data = scrape_job_page(html_content, "https://example.com/job/999")
+
+        assert job_data["contact_email"] == "hr@bigcorp.com"
+        assert job_data["contact_name"] == "HR Team"
+
+    def test_scrape_job_page_negative_keyword_match(self, monkeypatch):
+        """Test that negative keyword detection flags matching jobs."""
+
+        monkeypatch.setattr(
+            "web.scraper.get_negative_keywords", lambda: ["scam"])
+
+        html_content = """
+        <html>
+            <h1 class="postingtitle">Great Opportunity</h1>
+            <h2 class="company-name">SCAM Corp</h2>
+            <section id="postingbody"><p>This is a scam offer</p></section>
+        </html>
+        """
+
+        job_data = scrape_job_page(
+            html_content, "https://example.com/job/negative")
+
+        assert job_data["is_negative_match"] is True
+        assert job_data["negative_keyword_match"] == "scam"
+        assert job_data["negative_match_field"] in {
+            "title", "company", "description"}
+
+    def test_scrape_job_page_no_negative_match(self, monkeypatch):
+        """Test that jobs without matching keywords are not flagged."""
+
+        monkeypatch.setattr(
+            "web.scraper.get_negative_keywords", lambda: ["scam"])
+
+        html_content = """
+        <html>
+            <h1 class="postingtitle">Legit Opportunity</h1>
+            <h2 class="company-name">Honest Corp</h2>
+            <section id="postingbody"><p>We pay well and on time.</p></section>
+        </html>
+        """
+
+        job_data = scrape_job_page(
+            html_content, "https://example.com/job/positive")
+
+        assert job_data["is_negative_match"] is False
+        assert job_data["negative_keyword_match"] is None
+        assert job_data["negative_match_field"] is None
+
+
+class TestProcessJobUrlNegativeFiltering:
+    def test_process_job_url_skips_negative_match(self, monkeypatch):
+        job_url = "https://example.com/job/negative"
+        remove_calls = []
+        upsert_calls = []
+
+        monkeypatch.setattr(
+            "web.craigslist.get_last_fetch_time", lambda url: None)
+        monkeypatch.setattr(
+            "web.craigslist.insert_log",
+            lambda *args, **kwargs: None,
+        )
+        monkeypatch.setattr(
+            "web.craigslist.make_request_with_retry",
+            lambda url, attempts: "<html />",
+        )
+        monkeypatch.setattr(
+            "web.craigslist.scrape_job_page",
+            lambda content, url: _make_negative_job(url),
+        )
+
+        def fake_upsert(job_data, region="", keyword=""):
+            upsert_calls.append(job_data)
+
+        def fake_remove(url):
+            remove_calls.append(url)
+
+        monkeypatch.setattr("web.craigslist.upsert_job_details", fake_upsert)
+        monkeypatch.setattr("web.craigslist.remove_job", fake_remove)
+
+        messages = list(process_job_url(job_url, region="test", keyword="kw"))
+
+        assert any("Skipping job" in message for message in messages)
+        assert remove_calls == [job_url]
+        assert upsert_calls == []
+
+
+class TestScraperPipelineNegativeFiltering:
+    def test_scraper_skips_negative_jobs(self, monkeypatch):
+        job_url = "https://example.com/job/negative"
+        remove_calls = []
+        upsert_calls = []
+
+        monkeypatch.setattr("web.craigslist.db_init", lambda: None)
+
+        def fake_fetch_listings():
+            yield "Fake listing fetch\n"
+            return {"discovered": 0, "new": 0, "by_search": [], "new_jobs": []}
+
+        monkeypatch.setattr("web.craigslist.fetch_listings",
+                            fake_fetch_listings)
+        monkeypatch.setattr(
+            "web.craigslist.db_get_all_job_urls",
+            lambda: [{"url": job_url, "region": "reg", "keyword": "kw"}],
+        )
+        monkeypatch.setattr(
+            "web.craigslist.get_last_fetch_time", lambda url: None)
+        monkeypatch.setattr("web.craigslist.insert_log",
+                            lambda *args, **kwargs: None)
+        monkeypatch.setattr(
+            "web.craigslist.make_request_with_retry", lambda url, attempts: "<html />"
+        )
+        monkeypatch.setattr("web.craigslist.url_to_job_id",
+                            lambda url: "job123")
+        monkeypatch.setattr(
+            "web.craigslist.scrape_job_page",
+            lambda content, url: _make_negative_job(url),
+        )
+
+        def fake_upsert(job_data, region="", keyword=""):
+            upsert_calls.append(job_data)
+
+        def fake_remove(url):
+            remove_calls.append(url)
+
+        monkeypatch.setattr("web.craigslist.upsert_job_details", fake_upsert)
+        monkeypatch.setattr("web.craigslist.remove_job", fake_remove)
+
+        messages = list(scraper())
+
+        assert any("Skipping job" in message for message in messages)
+        assert remove_calls == [job_url]
+        assert upsert_calls == []
+
+
+class TestScraperEmailNotifications:
+    def test_scraper_sends_email_for_new_jobs(self, monkeypatch):
+        monkeypatch.setattr("web.craigslist.db_init", lambda: None)
+
+        new_jobs = [
+            {
+                "title": "Python Developer",
+                "company": "Acme",
+                "location": "Remote",
+                "url": "https://example.com/jobs/1",
+            }
+        ]
+
+        def fake_fetch_listings():
+            yield "Fake listing fetch\n"
+            return {
+                "discovered": 1,
+                "new": 1,
+                "by_search": [],
+                "new_jobs": new_jobs,
+            }
+
+        monkeypatch.setattr("web.craigslist.fetch_listings", fake_fetch_listings)
+        monkeypatch.setattr("web.craigslist.db_get_all_job_urls", lambda: [])
+
+        calls = {}
+
+        def fake_send_alert(jobs):
+            calls["jobs"] = jobs
+            return True, "sent"
+
+        monkeypatch.setattr("web.craigslist._send_new_job_alert", fake_send_alert)
+
+        messages = list(scraper())
+
+        assert calls["jobs"] == new_jobs
+        assert any("Job alert email sent." in message for message in messages)
--- a/tests/test_user_negative_keywords.py
+++ b/tests/test_user_negative_keywords.py
@@ -0,0 +1,148 @@
+import pytest
+from web.db import (
+    db_init,
+    create_or_update_user,
+    upsert_negative_keyword,
+    set_user_negative_keywords,
+    get_user_negative_keywords,
+    upsert_listing,
+    upsert_job_details,
+    get_all_jobs,
+    UserNegativeKeyword,
+    NegativeKeyword
+)
+from web.app import app
+from web.utils import filter_jobs
+
+
+@pytest.fixture
+def client():
+    app.config['TESTING'] = True
+    app.config['WTF_CSRF_ENABLED'] = False
+    with app.test_client() as client:
+        with app.app_context():
+            db_init()
+        yield client
+
+
+def test_negative_keyword_db_ops():
+    db_init()
+    username = "test_neg_user"
+    create_or_update_user(username, "password")
+
+    # Test upsert
+    kid = upsert_negative_keyword("scam")
+    assert kid > 0
+    kid2 = upsert_negative_keyword("scam")
+    assert kid == kid2
+
+    # Test set/get
+    set_user_negative_keywords(username, ["scam", "unpaid"])
+    nks = get_user_negative_keywords(username)
+    assert len(nks) == 2
+    assert "scam" in nks
+    assert "unpaid" in nks
+
+    # Test update
+    set_user_negative_keywords(username, ["scam"])
+    nks = get_user_negative_keywords(username)
+    assert len(nks) == 1
+    assert "scam" in nks
+    assert "unpaid" not in nks
+
+    # Test clear
+    set_user_negative_keywords(username, [])
+    nks = get_user_negative_keywords(username)
+    assert len(nks) == 0
+
+
+def test_settings_endpoint(client):
+    username = "test_settings_user"
+    create_or_update_user(username, "password")
+
+    # Login
+    client.post('/login', data={'username': username, 'password': 'password'})
+
+    # Post settings
+    resp = client.post('/settings', json={
+        'regions': [],
+        'keywords': [],
+        'negative_keywords': ['spam', 'junk']
+    })
+    assert resp.status_code == 200
+
+    # Verify DB
+    nks = get_user_negative_keywords(username)
+    assert "spam" in nks
+    assert "junk" in nks
+
+
+def test_job_filtering_with_negative_keywords():
+    # Setup jobs
+    jobs = [
+        {"title": "Great Job", "description": "Good pay"},
+        {"title": "Bad Job", "description": "This is a scam"},
+        {"title": "Okay Job", "description": "Average pay"},
+    ]
+
+    # Filter
+    filtered = filter_jobs(jobs, negative_keywords=["scam"])
+    assert len(filtered) == 2
+    assert "Bad Job" not in [j['title'] for j in filtered]
+
+    filtered = filter_jobs(jobs, negative_keywords=["pay"])
+    assert len(filtered) == 1
+    assert "Bad Job" in [j['title']
+                         for j in filtered]  # "scam" job doesn't have "pay"
+
+
+def test_jobs_endpoint_filtering(client):
+    username = "test_filter_user"
+    create_or_update_user(username, "password")
+
+    # Setup DB with jobs
+    upsert_listing(
+        url="http://example.com/1",
+        region="sfbay",
+        keyword="python",
+        title="Good Python Job",
+        pay="$100k",
+        location="SF",
+        timestamp="now"
+    )
+    upsert_job_details({
+        "url": "http://example.com/1",
+        "id": "1",
+        "title": "Good Python Job",
+        "description": "This is a legit job."
+    })
+
+    upsert_listing(
+        url="http://example.com/2",
+        region="sfbay",
+        keyword="python",
+        title="Bad Python Job",
+        pay="$100k",
+        location="SF",
+        timestamp="now"
+    )
+    upsert_job_details({
+        "url": "http://example.com/2",
+        "id": "2",
+        "title": "Bad Python Job",
+        "description": "This is a scam job."
+    })
+
+    # Login
+    client.post('/login', data={'username': username, 'password': 'password'})
+
+    # Set negative keywords
+    set_user_negative_keywords(username, ["scam"])
+
+    # Fetch jobs
+    resp = client.get('/jobs')
+    data = resp.get_json()
+
+    titles = [j['title'] for j in data]
+    assert "Good Python Job" in titles
+    assert "Bad Python Job" not in titles
--- a/tests/test_utils_config.py
+++ b/tests/test_utils_config.py
@@ -16,3 +16,23 @@ def test_http_settings_helpers():
    assert isinstance(utils.get_backoff_factor(), int)
    assert isinstance(utils.get_min_delay(), int)
    assert isinstance(utils.get_max_delay(), int)
+
+
+def test_negative_keywords_helper():
+    keywords = utils.get_negative_keywords()
+    assert isinstance(keywords, list)
+    for kw in keywords:
+        assert isinstance(kw, str)
+        assert kw == kw.lower()
+
+
+def test_email_settings_helper():
+    settings = utils.get_email_settings()
+    assert isinstance(settings, dict)
+    assert 'enabled' in settings
+    assert 'from_address' in settings
+    smtp = settings.get('smtp')
+    assert isinstance(smtp, dict)
+    assert 'host' in smtp
+    assert isinstance(smtp.get('port'), int)
+    assert isinstance(settings.get('recipients'), list)
--- a/web/app.py
+++ b/web/app.py
@@ -1,11 +1,13 @@
 import os
-from flask import Flask, request, jsonify, render_template, redirect, url_for, session, flash, send_file
+from flask import Flask, request, jsonify, render_template, redirect, url_for, session, flash, Response
 from flask_wtf import CSRFProtect
 from typing import Dict, List
+from datetime import datetime, timezone

 from web.craigslist import scraper
 from web.db import (
    db_init,
+    delete_user_by_id,
    get_all_jobs,
    mark_favorite,
    record_visit,
@@ -13,12 +15,16 @@ from web.db import (
    create_or_update_user,
    verify_user_credentials,
    get_user,
+    get_user_by_id,
    get_user_regions,
    get_user_keywords,
+    get_user_negative_keywords,
    set_user_regions,
    set_user_keywords,
+    set_user_negative_keywords,
    get_all_regions,
    get_all_keywords,
+    stats_overview,
    upsert_region,
    upsert_keyword,
    list_regions_full,
@@ -26,15 +32,24 @@ from web.db import (
    rename_region,
    rename_keyword,
    change_region_color,
-    change_keyword_color
+    change_keyword_color,
+    subscribe_email,
+    unsubscribe_email,
+    list_email_subscriptions,
+    list_email_templates,
+    create_email_template,
+    update_email_template,
+    delete_email_template,
+    get_email_template,
 )
 from web.utils import (
    initialize_users_from_settings,
    filter_jobs,
    get_job_by_id,
-    get_cache_dir,
+    now_iso,
 )
 from web.db import get_all_regions, get_all_keywords
+from web.email_templates import render_job_alert_email

 app = Flask(__name__)
 app.secret_key = os.environ.get("FLASK_SECRET", "dev-secret-change-me")
@@ -105,24 +120,30 @@ def index():
    # Apply user preference filters if no explicit filters provided
    selected_region = request.args.get("region")
    selected_keyword = request.args.get("keyword")
-    if not selected_region and session.get('username'):
+    user_negative_keywords = []
+
+    if session.get('username'):
        try:
-            prefs = get_user_regions(session['username'])
-            if prefs:
-                # If user has region prefs, filter to them by default
-                all_jobs = [j for j in all_jobs if j.get(
-                    'region') in set(prefs)]
+            username = session['username']
+            if not selected_region:
+                prefs = get_user_regions(username)
+                if prefs:
+                    # If user has region prefs, filter to them by default
+                    all_jobs = [j for j in all_jobs if j.get(
+                        'region') in set(prefs)]
+            if not selected_keyword:
+                prefs = get_user_keywords(username)
+                if prefs:
+                    all_jobs = [j for j in all_jobs if j.get(
+                        'keyword') in set(prefs)]
+
+            # Always fetch negative keywords for logged-in users
+            user_negative_keywords = get_user_negative_keywords(username)
        except Exception:
            pass
-    if not selected_keyword and session.get('username'):
-        try:
-            prefs = get_user_keywords(session['username'])
-            if prefs:
-                all_jobs = [j for j in all_jobs if j.get(
-                    'keyword') in set(prefs)]
-        except Exception:
-            pass
-    filtered_jobs = filter_jobs(all_jobs, selected_region, selected_keyword)
+
+    filtered_jobs = filter_jobs(
+        all_jobs, selected_region, selected_keyword, negative_keywords=user_negative_keywords)

    return render_template(
        "index.html",
@@ -176,23 +197,26 @@ def jobs():
    # Respect user preferences when no explicit filters provided
    region = request.args.get("region")
    keyword = request.args.get("keyword")
-    if not region and session.get('username'):
+    user_negative_keywords = []
+
+    if session.get('username'):
        try:
-            prefs = get_user_regions(session['username'])
-            if prefs:
-                all_jobs = [j for j in all_jobs if j.get(
-                    'region') in set(prefs)]
+            username = session['username']
+            if not region:
+                prefs = get_user_regions(username)
+                if prefs:
+                    all_jobs = [j for j in all_jobs if j.get(
+                        'region') in set(prefs)]
+            if not keyword:
+                prefs = get_user_keywords(username)
+                if prefs:
+                    all_jobs = [j for j in all_jobs if j.get(
+                        'keyword') in set(prefs)]
+
+            user_negative_keywords = get_user_negative_keywords(username)
        except Exception:
            pass
-    if not keyword and session.get('username'):
-        try:
-            prefs = get_user_keywords(session['username'])
-            if prefs:
-                all_jobs = [j for j in all_jobs if j.get(
-                    'keyword') in set(prefs)]
-        except Exception:
-            pass
-    return jsonify(filter_jobs(all_jobs, region, keyword))
+    return jsonify(filter_jobs(all_jobs, region, keyword, negative_keywords=user_negative_keywords))


@app.route('/job_details', methods=['GET'])
@@ -202,9 +226,9 @@ def job_details():
    if session.get('username'):
        try:
            r = set(get_user_regions(session['username']))
-            k = set(get_user_keywords(session['username']))
            if r:
                jobs = [j for j in jobs if j.get('region') in r]
+            k = set(get_user_keywords(session['username']))
            if k:
                jobs = [j for j in jobs if j.get('keyword') in k]
        except Exception:
@@ -230,39 +254,6 @@ def job_by_id(job_id):
    return jsonify({"error": "Job not found"}), 404


-@app.route('/cached/<job_id>', methods=['GET'])
-def serve_cached(job_id):
-    """Serve the cached HTML file for a job if available.
-
-    Uses the job record's `file_path_abs` when present, or resolves the DB `file_path` via helper.
-    Ensures the returned file is located under the configured cache directory to avoid path-traversal.
-    """
-    try:
-        from web.db import db_get_cached_abs_path
-        j = get_job_by_id(job_id)
-        if not j:
-            return "Job not found", 404
-
-        # Prefer file_path_abs, fall back to resolving the DB-stored file_path
-        abs_fp = j.get('file_path_abs') or None
-        if not abs_fp:
-            db_fp = j.get('file_path')
-            abs_fp = db_get_cached_abs_path(db_fp) if db_fp else None
-
-        if not abs_fp or not os.path.isfile(abs_fp):
-            return "Cached file not available", 404
-
-        cache_dir = os.path.abspath(get_cache_dir())
-        abs_fp = os.path.abspath(abs_fp)
-        # Ensure the file is inside the cache directory
-        if os.path.commonpath([cache_dir, abs_fp]) != cache_dir:
-            return "Forbidden", 403
-
-        return send_file(abs_fp)
-    except Exception:
-        return "Error serving cached file", 500
-
-
@app.route('/jobs/<job_id>/favorite', methods=['POST'])
 def set_favorite(job_id):
    """Mark or unmark a job as favorite for a given user.
@@ -288,9 +279,21 @@ csrf.exempt(set_favorite)

@app.route('/scrape', methods=['GET'])
 def scrape():
-    """Trigger the web scraping process."""
-    scraper()
-    return jsonify({"status": "Scraping completed"})
+    """Trigger the web scraping process with streaming output."""
+    def generate():
+        try:
+            for message in scraper():
+                yield message
+        except Exception as e:
+            yield f"Error during scraping: {str(e)}\n"
+
+    return Response(generate(), mimetype='text/plain')
+
+
+@app.route('/scrape-page', methods=['GET'])
+def scrape_page():
+    """Serve the scrape page with streaming output display."""
+    return render_template('scrape.html', title='Scrape Jobs')


 # ---------------- Auth & Admin UI ------------------------------------------
@@ -322,7 +325,7 @@ def admin_users():
    if request.method == 'POST':
        data = request.form
        username = (data.get('username') or '').strip()
-        password = data.get('password') or None
+        password = data.get('new_password') or None
        is_admin = bool(data.get('is_admin'))
        is_active = bool(data.get('is_active')) if data.get(
            'is_active') is not None else True
@@ -342,6 +345,163 @@ def admin_users():
    return render_template('admin/users.html', users=users, title='Users')


+@app.route('/admin/user/<user_id>', methods=['GET', 'POST'])
+def admin_user(user_id):
+    if not require_admin():
+        return redirect(url_for('login'))
+    user = get_user_by_id(user_id)
+    if request.method == 'POST':
+        data = request.form
+        username = (data.get('username') or '').strip()
+        password = data.get('new_password')
+        is_admin = bool(data.get('is_admin'))
+        is_active = bool(data.get('is_active')) if data.get(
+            'is_active') is not None else True
+        try:
+            create_or_update_user(
+                username, password=password, is_admin=is_admin, is_active=is_active)
+            flash('User saved')
+        except Exception as e:
+            flash(f'Error: {e}')
+        return redirect(url_for('admin_users'))
+    return render_template('admin/user.html', user=user, title='User')
+
+
+@app.route('/admin/user/<user_id>/delete', methods=['POST'])
+def admin_user_delete(user_id):
+    if not require_admin():
+        return redirect(url_for('login'))
+    if delete_user_by_id(user_id):
+        flash('User deleted')
+    else:
+        flash('Error deleting user')
+    return redirect(url_for('admin_users'))
+
+
+@app.route('/admin/emails', methods=['GET', 'POST'])
+def admin_emails():
+    if not require_admin():
+        return redirect(url_for('login'))
+    if request.method == 'POST':
+        action = (request.form.get('action') or '').strip().lower()
+        email = (request.form.get('email') or '').strip()
+        try:
+            if action == 'subscribe':
+                subscribe_email(email)
+                flash('Subscription saved')
+            elif action == 'unsubscribe':
+                if unsubscribe_email(email):
+                    flash('Subscription deactivated')
+                else:
+                    flash('No matching subscription found')
+            elif action == 'reactivate':
+                subscribe_email(email)
+                flash('Subscription reactivated')
+            else:
+                flash('Unknown action')
+        except ValueError as exc:
+            flash(f'Error: {exc}')
+        except Exception as exc:
+            flash(f'Error: {exc}')
+        return redirect(url_for('admin_emails'))
+    subscriptions = list_email_subscriptions()
+
+    class Sub(dict):
+        __getattr__ = dict.get
+
+    subscription_rows = [Sub(s) for s in subscriptions]
+    active_count = sum(1 for s in subscription_rows if s.get('is_active'))
+    return render_template(
+        'admin/email.html',
+        title='Email Subscriptions',
+        subscriptions=subscription_rows,
+        total_active=active_count,
+        total=len(subscription_rows),
+    )
+
+
+@app.route('/admin/email-templates', methods=['GET', 'POST'])
+def admin_email_templates():
+    if not require_admin():
+        return redirect(url_for('login'))
+
+    if request.method == 'POST':
+        action = (request.form.get('action') or '').strip().lower()
+        template_id = request.form.get('template_id')
+        name = request.form.get('name') or ''
+        slug = request.form.get('slug') or ''
+        subject = request.form.get('subject') or ''
+        body = request.form.get('body') or ''
+        is_active = request.form.get('is_active') == 'on'
+        try:
+            if action == 'create':
+                create_email_template(
+                    name=name, slug=slug, subject=subject, body=body, is_active=is_active)
+                flash('Template created')
+            elif action == 'update':
+                update_email_template(
+                    int(template_id or 0),
+                    name=name,
+                    slug=slug or None,
+                    subject=subject,
+                    body=body,
+                    is_active=is_active,
+                )
+                flash('Template updated')
+            elif action == 'delete':
+                if delete_email_template(int(template_id or 0)):
+                    flash('Template deleted')
+                else:
+                    flash('Template not found')
+            else:
+                flash('Unknown action')
+        except ValueError as exc:
+            flash(f'Error: {exc}')
+        except Exception as exc:
+            flash(f'Error: {exc}')
+        return redirect(url_for('admin_email_templates'))
+
+    templates = list_email_templates(include_inactive=True)
+    edit_id = request.args.get('template_id', type=int)
+    editing = get_email_template(edit_id) if edit_id else None
+
+    preview_payload = None
+    preview_template = None
+    preview_id = request.args.get('preview_id', type=int)
+    if preview_id:
+        preview_template = get_email_template(preview_id)
+        if preview_template:
+            sample_jobs = [
+                {
+                    'title': 'Senior Python Engineer',
+                    'company': 'ACME Corp',
+                    'location': 'Remote',
+                    'url': 'https://example.com/jobs/1',
+                },
+                {
+                    'title': 'Data Engineer',
+                    'company': 'Globex',
+                    'location': 'New York, NY',
+                    'url': 'https://example.com/jobs/2',
+                },
+            ]
+            preview_payload = render_job_alert_email(
+                sample_jobs,
+                region='preview-region',
+                keyword='preview-keyword',
+                template_override=preview_template,
+            )
+
+    return render_template(
+        'admin/email_templates.html',
+        title='Email Templates',
+        templates=templates,
+        editing=editing,
+        preview=preview_payload,
+        preview_template=preview_template,
+    )
+
+
 # ---------------- User settings (regions/keywords) -------------------------

@app.route('/settings', methods=['GET', 'POST'])
@@ -353,6 +513,8 @@ def user_settings():
        # Accept JSON or form posts. Normalize singular/plural names.
        sel_regions: list[str] = []
        sel_keywords: list[str] = []
+        sel_negative_keywords: list[str] = []
+
        if request.is_json:
            data = request.get_json(silent=True) or {}
            sel_regions = [
@@ -361,16 +523,25 @@ def user_settings():
            sel_keywords = [
                (v or '').strip() for v in (data.get('keywords') or []) if v and (v or '').strip()
            ]
+            sel_negative_keywords = [
+                (v or '').strip() for v in (data.get('negative_keywords') or []) if v and (v or '').strip()
+            ]
        else:
            # HTML form fallback: support names 'regions' or 'region', 'keywords' or 'keyword'
            r_vals = request.form.getlist(
                'regions') + request.form.getlist('region')
            k_vals = request.form.getlist(
                'keywords') + request.form.getlist('keyword')
+            nk_vals = request.form.getlist(
+                'negative_keywords') + request.form.getlist('negative_keyword')
+
            sel_regions = [(v or '').strip()
                           for v in r_vals if v and (v or '').strip()]
            sel_keywords = [(v or '').strip()
                            for v in k_vals if v and (v or '').strip()]
+            sel_negative_keywords = [(v or '').strip()
+                                     for v in nk_vals if v and (v or '').strip()]
+
        # Upsert any new values into master lists
        for r in sel_regions:
            try:
@@ -382,9 +553,14 @@ def user_settings():
                upsert_keyword(k)
            except Exception:
                pass
+        # Negative keywords are upserted inside set_user_negative_keywords implicitly if we wanted,
+        # but let's stick to the pattern. Actually set_user_negative_keywords calls upsert_negative_keyword.
+
        try:
            set_user_regions(username, sel_regions)
            set_user_keywords(username, sel_keywords)
+            set_user_negative_keywords(username, sel_negative_keywords)
+
            # For JSON callers, return 200 without redirect
            if request.is_json:
                return jsonify({"status": "ok"})
@@ -399,6 +575,8 @@ def user_settings():
    all_keywords = get_all_keywords()
    user_regions = get_user_regions(username)
    user_keywords = get_user_keywords(username)
+    user_negative_keywords = get_user_negative_keywords(username)
+
    return render_template(
        'user/settings.html',
        title='Your Preferences',
@@ -406,6 +584,7 @@ def user_settings():
        all_keywords=all_keywords,
        user_regions=user_regions,
        user_keywords=user_keywords,
+        user_negative_keywords=user_negative_keywords,
    )


@@ -474,6 +653,45 @@ def admin_taxonomy():
    return render_template('admin/taxonomy.html', title='Taxonomy', regions=regions, keywords=keywords)


+@app.route('/admin/stats', methods=['GET'])
+def admin_stats():
+    if not require_admin():
+        return redirect(url_for('login'))
+    # Optional filters via query params
+    keyword = request.args.get('keyword')
+    region = request.args.get('region')
+    try:
+        stats = stats_overview()
+        # For detailed jobs table, reuse get_all_jobs() and filter
+        jobs = get_all_jobs()
+        if keyword:
+            jobs = [j for j in jobs if (j.get('keyword') or '') == keyword]
+        if region:
+            jobs = [j for j in jobs if (j.get('region') or '') == region]
+    except Exception as e:
+        flash(f'Error computing stats: {e}')
+        stats = {
+            'total_jobs': 0,
+            'total_keywords': 0,
+            'total_regions': 0,
+            'jobs_per_keyword': [],
+            'jobs_per_region': []
+        }
+        jobs = []
+    return render_template('admin/stats.html', title='Statistics', stats=stats, jobs=jobs, regions=get_all_regions(), keywords=get_all_keywords())
+
+
+@app.route('/health', methods=['GET'])
+def health_check():
+    """Health check endpoint for monitoring application status."""
+    return jsonify({
+        "status": "healthy",
+        "timestamp": now_iso(),
+        "service": "jobs-scraper",
+        "version": "1.0.0"
+    }), 200
+
+
 def init():
    """Main function to run the Flask app."""
    # Ensure DB is initialized
--- a/web/craigslist.py
+++ b/web/craigslist.py
@@ -2,42 +2,83 @@ from datetime import datetime, timezone
 from web.scraper import process_region_keyword, scrape_job_page
 from web.db import (
    db_init,
-    upsert_cached_page,
    upsert_listing,
    upsert_job_details,
    url_to_job_id,
    upsert_user_interaction,
-    db_remove_cached_url,
-    db_sync_cached_pages,
    db_get_all_job_urls,
-    db_get_cache_url,
    db_delete_job,
    remove_job,
-    normalize_cached_page_paths,
+    insert_log,
+    get_last_fetch_time,
 )
-
+import schedule
+import time
 # Import utility functions
 from web.utils import (
-    get_cache_dir,
+    get_base_url,
    make_request_with_retry,
-    now_iso,
-    get_cache_path,
-    cache_page,
-    is_cache_stale,
-    delete_cached_page,
-    get_cached_content,
-    ensure_cache_dir
+    get_email_settings,
 )
 from web.db import get_all_regions, get_all_keywords, seed_regions_keywords_from_listings
+from web.email_templates import render_job_alert_email
+from web.email_service import send_email
+
+
+def _negative_match_details(job_data: dict) -> tuple[str, str] | None:
+    """Return (keyword, field) when job_data indicates a negative match."""
+    if not job_data or not job_data.get("is_negative_match"):
+        return None
+    keyword = (job_data.get("negative_keyword_match") or "").strip()
+    field = (job_data.get("negative_match_field")
+             or "unknown").strip() or "unknown"
+    if not keyword:
+        keyword = "unknown keyword"
+    return keyword, field
+
+
+def _send_new_job_alert(new_jobs: list[dict]) -> tuple[bool, str]:
+    """Send an email alert for newly discovered jobs.
+
+    Returns (sent, message) where message explains why mail was skipped.
+    """
+
+    settings = get_email_settings()
+    if not settings.get("enabled"):
+        return False, "email alerts disabled"
+
+    recipients = settings.get("recipients", []) or []
+    if not recipients:
+        return False, "no recipients configured"
+
+    payload = render_job_alert_email(new_jobs)
+    send_email(
+        subject=payload.get("subject", "New jobs available"),
+        body=payload.get("body", ""),
+        to=recipients,
+        settings=settings,
+    )
+    return True, "sent"


 def fetch_listings():
-    """Fetch job listings from all regions and keywords."""
+    """Fetch job listings from all regions and keywords.
+
+    Yields progress messages and returns a dict with:
+    - discovered: total number of unique job URLs discovered
+    - new: total number of new jobs added to the database
+    - by_search: list of dicts, each containing:
+        - region: region name
+        - keyword: keyword name
+        - count: number of jobs fetched for this search
+    """
    # We'll collect URLs discovered in this run and then remove any DB listings
    # not present in this set (treat DB as reflecting current search results).
-    existing_db_urls = set(db_get_all_job_urls())
+    existing_db_urls = set(row['url'] for row in db_get_all_job_urls())
    discovered_urls = set()
    new_rows = []
+    new_jobs = []
+    search_results = []  # Track count per search

    # Ensure regions/keywords master lists exist
    try:
@@ -45,20 +86,64 @@ def fetch_listings():
    except Exception:
        pass

+    yield "Initializing database and seeding regions/keywords...\n"
+
    # Fetch listings for each region/keyword from DB
-    for region in get_all_regions():
+    regions = get_all_regions()
+    keywords = get_all_keywords()
+    total_combinations = len(regions) * len(keywords)
+    processed = 0
+
+    yield f"Found {len(regions)} regions and {len(keywords)} keywords. Processing {total_combinations} combinations...\n"
+
+    for region in regions:
        region_name = region.get("name")
        if not region_name:
            continue
-        for keyword in get_all_keywords():
+        for keyword in keywords:
            keyword_name = keyword.get("name")
            if not keyword_name:
                continue
+            # Build a canonical search identifier for this region+keyword combination.
+            url = get_base_url().format(region=region, keyword=keyword_name.replace(" ", "+"))
+            search_page_id = f"search:{region_name}:{keyword_name}"
+            search_count = 0  # Count jobs for this search
+            try:
+                last = get_last_fetch_time(url)
+                if last is not None:
+                    # skip if fetched within the last hour
+                    age = datetime.now(
+                        timezone.utc) - (last if last.tzinfo is not None else last.replace(tzinfo=timezone.utc))
+                    if age.total_seconds() < 1 * 3600:
+                        yield f"Skipping {region_name} + {keyword_name} (fetched {age.seconds//3600}h ago)...\n"
+                        processed += 1
+                        continue
+            except Exception:
+                # if logging lookup fails, proceed with fetch
+                pass
+            processed += 1
+            yield f"Processing {region_name} + {keyword_name} ({processed}/{total_combinations})...\n"
+            # record that we're fetching this search page now
+            try:
+                insert_log(url, region=region_name,
+                           keyword=keyword_name, fetched_at=datetime.now(timezone.utc))
+            except Exception:
+                pass
            for row in process_region_keyword(region_name, keyword_name, discovered_urls):
                timestamp, region, keyword, title, pay, location, url = row
                discovered_urls.add(url)
+                search_count += 1
                if url not in existing_db_urls:
                    new_rows.append(row)
+                    new_jobs.append({
+                        "timestamp": timestamp,
+                        "region": region,
+                        "keyword": keyword,
+                        "title": title,
+                        "pay": pay,
+                        "location": location,
+                        "url": url,
+                    })
                # Upsert or update listing to reflect current search result
                upsert_listing(
                    url=url,
@@ -68,80 +153,154 @@ def fetch_listings():
                    pay=pay,
                    location=location,
                    timestamp=timestamp,
+                    fetched_from=search_page_id,
+                    fetched_at=datetime.now(timezone.utc),
                )
+            # Record per-search count
+            search_results.append({
+                "region": region_name,
+                "keyword": keyword_name,
+                "count": search_count
+            })

-    # Remove stale listings: those present in DB but not discovered now.
-    stale_urls = existing_db_urls - discovered_urls
-    for url in stale_urls:
-        try:
-            jid = url_to_job_id(url)
-            db_delete_job(jid)
-            # Also try to remove cached file and its metadata
-            delete_cached_page(url)
-            db_remove_cached_url(url)
-        except Exception:
-            pass
-
-    return {"discovered": len(discovered_urls), "new": len(new_rows), "stale": len(stale_urls)}
+    yield f"Listing fetch complete: {len(discovered_urls)} discovered, {len(new_rows)} new,\n"
+    return {
+        "discovered": len(discovered_urls),
+        "new": len(new_rows),
+        "by_search": search_results,
+        "new_jobs": new_jobs,
+    }


-def process_job_url(job_url: str):
+def process_job_url(job_url: str, region: str = "", keyword: str = ""):
+    last = get_last_fetch_time(job_url)
+    if last is not None:
+        # skip if fetched within the last 24 hours
+        age = datetime.now(
+            timezone.utc) - (last if last.tzinfo is not None else last.replace(tzinfo=timezone.utc))
+        if age.total_seconds() < 24 * 3600:
+            yield f"Skipping job {job_url} (fetched {age.seconds//3600}h ago)...\n"
+            return None
+
    try:
        job_id = url_to_job_id(job_url)
-        content = None
-        cached_page = db_get_cache_url(job_url)
-        if cached_page:
-            last_modified = cached_page.get("last_modified")
-            if last_modified and not is_cache_stale(last_modified):
-                content = get_cached_content(job_url)
-            else:
-                content = make_request_with_retry(job_url, 1)
-        else:
-            content = make_request_with_retry(job_url, 1)
+        yield f"Fetching job page: {job_url}\n"
+        content = make_request_with_retry(job_url, 1)

        if content is None:
+            yield f"Failed to fetch content for {job_url}, removing from database\n"
            remove_job(job_url)
            return None

-        # refresh cache and details
-        cache_page(job_url, content)
-        upsert_cached_page(
-            file_path=get_cache_path(job_url),
-            url_guess=job_url,
-            last_modified=now_iso(),
-            size_bytes=len(content),
-            job_id=job_id
-        )
+        yield f"Scraping job data from {job_url}\n"
        job_data = scrape_job_page(content, job_url)
        if job_data:
-            upsert_job_details(job_data)
-            upsert_user_interaction(
-                job_id, seen_at=datetime.now(timezone.utc).isoformat())
+            negative_info = _negative_match_details(job_data)
+            if negative_info:
+                keyword, field = negative_info
+                yield (
+                    f"Skipping job {job_id} due to negative keyword "
+                    f"'{keyword}' in {field}\n"
+                )
+                remove_job(job_url)
+                return None
+            yield f"Upserting job details for {job_id}\n"
+            upsert_job_details(job_data, region=region, keyword=keyword)
+            yield f"Successfully processed job {job_id}: {job_data.get('title', 'Unknown')}\n"
            return job_data
+        else:
+            yield f"Failed to scrape job data from {job_url}\n"
        return None
-    except Exception:
+    except Exception as e:
+        yield f"Error processing {job_url}: {str(e)}\n"
        return None


 def scraper():
    """Main function to run the scraper."""
-    ensure_cache_dir()
+    yield "Starting scraper...\n"
    db_init()
+    yield "Database initialized\n"
+
    # First, fetch current listings from search pages and make DB reflect them.
-    jl = fetch_listings()
+    yield "Fetching listings...\n"
+    listing_summary: dict | None = None
+    fetch_iter = fetch_listings()
+    try:
+        while True:
+            message = next(fetch_iter)
+            yield message
+    except StopIteration as stop:
+        listing_summary = stop.value if isinstance(stop.value, dict) else {}

-    # Sync any cached files we have on disk into the cached_pages table.
-    db_sync_cached_pages(get_cache_dir())
+    new_jobs = []
+    if listing_summary:
+        new_jobs = listing_summary.get("new_jobs", []) or []

-    # Normalize any relative cached file paths to absolute paths in DB
-    normalized = normalize_cached_page_paths()
-    if normalized:
-        pass
+    if new_jobs:
+        yield f"Preparing email alert for {len(new_jobs)} new jobs...\n"
+        try:
+            sent, info = _send_new_job_alert(new_jobs)
+            if sent:
+                yield "Job alert email sent.\n"
+            else:
+                yield f"Skipping email alert: {info}\n"
+        except Exception as exc:
+            yield f"Failed to send job alert email: {exc}\n"

    # Finally, fetch and refresh individual job pages for current listings
-    for url in db_get_all_job_urls():
-        process_job_url(url)
+    job_urls = db_get_all_job_urls()
+    yield f"Processing {len(job_urls)} job pages...\n"

+    for i, url_dict in enumerate(job_urls, start=1):
+        url = url_dict.get("url")
+        region = url_dict.get("region", "")
+        keyword = url_dict.get("keyword", "")
+        if not url:
+            continue
+        yield f"\n--- Processing job {i}/{len(job_urls)} ---\n"
+        for message in process_job_url(job_url=url, region=region, keyword=keyword):
+            yield message
+
+    yield "\nScraping completed successfully!\n"
+
+
+def scrape_jobs_with_retry(max_retries=3):
+    """Run the scraping process with retry logic for failures."""
+    for attempt in range(max_retries):
+        try:
+            scraper()
+            return True
+        except Exception as e:
+            if attempt < max_retries - 1:
+                time.sleep(2 ** attempt * 10)  # Exponential backoff
+    return False
+
+
+def start_scheduler():
+    """Start the scheduler to run scraping every hour."""
+    # Clear any existing jobs
+    schedule.clear()
+
+    # Schedule scraping every hour
+    schedule.every().hour.do(scrape_jobs_with_retry)
+
+    # Run the scheduler in a loop
+    while True:
+        schedule.run_pending()
+        time.sleep(60)  # Check every minute
+
+
+def run_scheduled_scraping():
+    """Run the scheduled scraping process."""
+    try:
+        scrape_jobs_with_retry()
+    except Exception as e:
+        pass
+
+
+# Initialize scheduler when module is imported
+schedule.every().hour.do(run_scheduled_scraping)

 if __name__ == "__main__":
    scraper()
--- a/web/db.py
+++ b/web/db.py
@@ -4,27 +4,24 @@ from __future__ import annotations

 Tables:
    - users(user_id PK, username UNIQUE, created_at)
-    - cached_pages(file_path PK, url_guess, last_modified, size_bytes, job_id)
    - job_listings(job_id PK, url UNIQUE, region, keyword, title, pay, location, timestamp)
-    - job_descriptions(job_id PK FK -> job_listings, title, company, location, description, posted_time, url)
+    - job_descriptions(job_id PK FK -> job_listings, title, company, location, description, posted_time, url, reply_url)
    - user_interactions(job_id PK FK -> job_listings, user_id FK -> users, seen_at, url_visited, is_user_favorite)
    - regions(region_id PK, name UNIQUE)
    - keywords(keyword_id PK, name UNIQUE)
    - user_regions(user_id FK -> users, region_id FK -> regions, composite PK)
    - user_keywords(user_id FK -> users, keyword_id FK -> keywords, composite PK)
+    - logs(id PK, page_url, region, keyword, fetched_at)
 """

 from datetime import datetime, UTC
-import os
 from typing import Optional, Dict, Any, List
+import re
 from web.utils import (
-    get_url_from_filename,
    get_color_from_string,
    url_to_job_id,
    normalize_job_id,
    now_iso,
-    get_cache_path,
-    get_cache_dir,
    get_mysql_config,
 )

@@ -86,8 +83,6 @@ class JobListing(Base):

    description = relationship(
        "JobDescription", back_populates="listing", uselist=False, cascade="all, delete-orphan")
-    cached_pages = relationship(
-        "CachedPage", back_populates="listing", cascade="all, delete-orphan")
    interactions = relationship(
        "UserInteraction", back_populates="listing", cascade="all, delete-orphan")

@@ -102,32 +97,277 @@ class JobDescription(Base):
    description = Column(Text)
    posted_time = Column(String(TIME_LEN))
    url = Column(String(URL_LEN))
+    reply_url = Column(String(URL_LEN))
+    contact_email = Column(String(SHORT_LEN))
+    contact_phone = Column(String(SHORT_LEN))
+    contact_name = Column(String(SHORT_LEN))

    listing = relationship("JobListing", back_populates="description")


-class CachedPage(Base):
-    __tablename__ = "cached_pages"
-    file_path = Column(String(FILE_PATH_LEN), primary_key=True)
-    url_guess = Column(String(URL_LEN))
-    last_modified = Column(String(TIME_LEN))
-    size_bytes = Column(Integer)
-    job_id = Column(String(JOB_ID_LEN), ForeignKey(
-        "job_listings.job_id", ondelete="CASCADE"))
+def _normalize_email(value: Optional[str]) -> str:
+    if not value or not isinstance(value, str):
+        return ""
+    return value.strip().lower()

-    listing = relationship("JobListing", back_populates="cached_pages")

-    @property
-    def abs_path(self) -> Optional[str]:
-        """Return the absolute filesystem path for this cached page.
+def subscribe_email(email: str) -> bool:
+    """Add or reactivate an email subscription."""
+    address = _normalize_email(email)
+    if not address:
+        raise ValueError("email address required")
+    with _ensure_session() as session:
+        existing = session.execute(
+            text(
+                "SELECT subscription_id, is_active FROM email_subscriptions WHERE email = :e"
+            ),
+            {"e": address},
+        ).fetchone()
+        now = datetime.now(UTC)
+        if existing:
+            session.execute(
+                text(
+                    "UPDATE email_subscriptions SET is_active = 1, updated_at = :u WHERE subscription_id = :sid"
+                ),
+                {"u": now, "sid": existing[0]},
+            )
+        else:
+            session.execute(
+                text(
+                    "INSERT INTO email_subscriptions(email, is_active, created_at, updated_at) "
+                    "VALUES(:e, 1, :u, :u)"
+                ),
+                {"e": address, "u": now},
+            )
+        session.commit()
+    return True

-        The DB stores `file_path` relative to the configured cache dir. This
-        helper centralizes resolution so callers can use `cached_page.abs_path`.
-        """
-        fp = getattr(self, 'file_path', None)
-        if not fp:
-            return None
-        return os.path.join(os.path.abspath(get_cache_dir()), fp)
+
+def unsubscribe_email(email: str) -> bool:
+    """Deactivate an email subscription."""
+    address = _normalize_email(email)
+    if not address:
+        raise ValueError("email address required")
+    with _ensure_session() as session:
+        now = datetime.now(UTC)
+        result = session.execute(
+            text(
+                "UPDATE email_subscriptions SET is_active = 0, updated_at = :u WHERE email = :e"
+            ),
+            {"u": now, "e": address},
+        )
+        session.commit()
+        rowcount = getattr(result, "rowcount", None)
+        if rowcount is None:
+            return False
+        return rowcount > 0
+
+
+def list_email_subscriptions(*, active_only: bool = False) -> List[Dict[str, Any]]:
+    """Return subscription rows as dicts."""
+    query = "SELECT subscription_id, email, is_active, created_at, updated_at FROM email_subscriptions"
+    params: Dict[str, Any] = {}
+    if active_only:
+        query += " WHERE is_active = 1"
+    query += " ORDER BY email"
+    with _ensure_session() as session:
+        rows = session.execute(text(query), params).fetchall()
+    result: List[Dict[str, Any]] = []
+    for row in rows:
+        result.append(
+            {
+                "subscription_id": row[0],
+                "email": row[1],
+                "is_active": bool(row[2]),
+                "created_at": row[3],
+                "updated_at": row[4],
+            }
+        )
+    return result
+
+
+def get_active_email_recipients() -> List[str]:
+    """Return list of active subscription email addresses."""
+    return [s["email"] for s in list_email_subscriptions(active_only=True)]
+
+
+def _normalize_slug(value: Optional[str]) -> str:
+    if not value:
+        return ""
+    slug = re.sub(r"[^a-zA-Z0-9-]+", "-", value.strip().lower())
+    slug = re.sub(r"-+", "-", slug).strip("-")
+    return slug
+
+
+def _template_to_dict(template: EmailTemplate) -> Dict[str, Any]:
+    created = getattr(template, "created_at", None)
+    updated = getattr(template, "updated_at", None)
+    return {
+        "template_id": template.template_id,
+        "slug": template.slug,
+        "name": template.name,
+        "subject": template.subject,
+        "body": template.body,
+        "is_active": bool(template.is_active),
+        "created_at": created.isoformat() if isinstance(created, datetime) else created,
+        "updated_at": updated.isoformat() if isinstance(updated, datetime) else updated,
+    }
+
+
+def list_email_templates(*, include_inactive: bool = True) -> List[Dict[str, Any]]:
+    with _ensure_session() as session:
+        query = session.query(EmailTemplate)
+        if not include_inactive:
+            query = query.filter(EmailTemplate.is_active.is_(True))
+        items = query.order_by(EmailTemplate.name.asc()).all()
+        return [_template_to_dict(obj) for obj in items]
+
+
+def get_email_template(template_id: int) -> Optional[Dict[str, Any]]:
+    if not template_id:
+        return None
+    with _ensure_session() as session:
+        obj = session.get(EmailTemplate, int(template_id))
+        return _template_to_dict(obj) if obj else None
+
+
+def get_email_template_by_slug(slug: str) -> Optional[Dict[str, Any]]:
+    normalized = _normalize_slug(slug)
+    if not normalized:
+        return None
+    with _ensure_session() as session:
+        obj = session.query(EmailTemplate).filter(
+            EmailTemplate.slug == normalized).one_or_none()
+        return _template_to_dict(obj) if obj else None
+
+
+def create_email_template(
+    *,
+    name: str,
+    subject: str,
+    body: str,
+    slug: Optional[str] = None,
+    is_active: bool = True,
+) -> Dict[str, Any]:
+    name_clean = (name or "").strip()
+    if not name_clean:
+        raise ValueError("Template name is required")
+    subject_clean = (subject or "").strip()
+    if not subject_clean:
+        raise ValueError("Template subject is required")
+    body_clean = (body or "").strip()
+    if not body_clean:
+        raise ValueError("Template body is required")
+
+    slug_clean = _normalize_slug(slug or name_clean)
+    if not slug_clean:
+        raise ValueError("Template slug is required")
+
+    with _ensure_session() as session:
+        existing = session.query(EmailTemplate).filter(
+            EmailTemplate.slug == slug_clean).one_or_none()
+        if existing:
+            raise ValueError("A template with this slug already exists")
+        template = EmailTemplate(
+            name=name_clean,
+            slug=slug_clean,
+            subject=subject_clean,
+            body=body_clean,
+            is_active=bool(is_active),
+        )
+        session.add(template)
+        session.commit()
+        session.refresh(template)
+        return _template_to_dict(template)
+
+
+def update_email_template(
+    template_id: int,
+    *,
+    name: Optional[str] = None,
+    subject: Optional[str] = None,
+    body: Optional[str] = None,
+    slug: Optional[str] = None,
+    is_active: Optional[bool] = None,
+) -> Dict[str, Any]:
+    if not template_id:
+        raise ValueError("template_id is required")
+    with _ensure_session() as session:
+        template = session.get(EmailTemplate, int(template_id))
+        if template is None:
+            raise ValueError("Template not found")
+        if name is not None:
+            name_clean = name.strip()
+            if not name_clean:
+                raise ValueError("Template name is required")
+            setattr(template, "name", name_clean)
+        if subject is not None:
+            subject_clean = subject.strip()
+            if not subject_clean:
+                raise ValueError("Template subject is required")
+            setattr(template, "subject", subject_clean)
+        if body is not None:
+            body_clean = body.strip()
+            if not body_clean:
+                raise ValueError("Template body is required")
+            setattr(template, "body", body_clean)
+        if slug is not None:
+            slug_clean = _normalize_slug(slug)
+            if not slug_clean:
+                raise ValueError("Template slug is required")
+            existing = (
+                session.query(EmailTemplate)
+                .filter(EmailTemplate.slug == slug_clean, EmailTemplate.template_id != template.template_id)
+                .one_or_none()
+            )
+            if existing:
+                raise ValueError("A template with this slug already exists")
+            setattr(template, "slug", slug_clean)
+        if is_active is not None:
+            setattr(template, "is_active", bool(is_active))
+        template.touch()
+        session.commit()
+        session.refresh(template)
+        return _template_to_dict(template)
+
+
+def delete_email_template(template_id: int) -> bool:
+    if not template_id:
+        return False
+    with _ensure_session() as session:
+        template = session.get(EmailTemplate, int(template_id))
+        if template is None:
+            return False
+        session.delete(template)
+        session.commit()
+        return True
+
+
+def ensure_default_email_template() -> None:
+    try:
+        from web.email_templates import DEFAULT_JOB_ALERT_SUBJECT, DEFAULT_JOB_ALERT_BODY
+    except Exception:
+        DEFAULT_JOB_ALERT_SUBJECT = "{count_label}{scope}"
+        DEFAULT_JOB_ALERT_BODY = (
+            "Hi,\n\n{intro_line}\n{jobs_message}\n\nGenerated at {timestamp} UTC.\n"
+            "You are receiving this message because job alerts are enabled.\n"
+        )
+    try:
+        with _ensure_session() as session:
+            existing = session.query(EmailTemplate).filter(
+                EmailTemplate.slug == "job-alert").one_or_none()
+            if existing is None:
+                template = EmailTemplate(
+                    name="Job Alert",
+                    slug="job-alert",
+                    subject=DEFAULT_JOB_ALERT_SUBJECT,
+                    body=DEFAULT_JOB_ALERT_BODY,
+                    is_active=True,
+                )
+                session.add(template)
+                session.commit()
+    except Exception:
+        pass


 class UserInteraction(Base):
@@ -176,6 +416,58 @@ class UserKeyword(Base):
        "keywords.keyword_id", ondelete="CASCADE"), primary_key=True)


+class NegativeKeyword(Base):
+    __tablename__ = "negative_keywords"
+    keyword_id = Column(Integer, primary_key=True, autoincrement=True)
+    name = Column(String(SHORT_LEN), unique=True, nullable=False)
+
+
+class UserNegativeKeyword(Base):
+    __tablename__ = "user_negative_keywords"
+    user_id = Column(Integer, ForeignKey(
+        "users.user_id", ondelete="CASCADE"), primary_key=True)
+    keyword_id = Column(Integer, ForeignKey(
+        "negative_keywords.keyword_id", ondelete="CASCADE"), primary_key=True)
+
+
+class Log(Base):
+    __tablename__ = "logs"
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    page_url = Column(String(URL_LEN))
+    region = Column(String(SHORT_LEN))
+    keyword = Column(String(SHORT_LEN))
+    fetched_at = Column(DateTime)
+
+
+class EmailSubscription(Base):
+    __tablename__ = "email_subscriptions"
+    subscription_id = Column(Integer, primary_key=True, autoincrement=True)
+    email = Column(String(SHORT_LEN), unique=True, nullable=False)
+    is_active = Column(Boolean, default=True, nullable=False)
+    created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
+    updated_at = Column(DateTime, default=datetime.utcnow, nullable=False)
+
+    def touch(self):
+        setattr(self, "updated_at", datetime.utcnow())
+
+
+class EmailTemplate(Base):
+    __tablename__ = "email_templates"
+    template_id = Column(Integer, primary_key=True, autoincrement=True)
+    slug = Column(String(SHORT_LEN), unique=True, nullable=False)
+    name = Column(String(SHORT_LEN), nullable=False)
+    subject = Column(Text, nullable=False)
+    body = Column(Text, nullable=False)
+    is_active = Column(Boolean, default=True, nullable=False)
+    created_at = Column(
+        DateTime, default=lambda: datetime.now(UTC), nullable=False)
+    updated_at = Column(
+        DateTime, default=lambda: datetime.now(UTC), nullable=False)
+
+    def touch(self):
+        setattr(self, "updated_at", datetime.now(UTC))
+
+
 def _ensure_session() -> Session:
    global engine, SessionLocal
    if engine is None or SessionLocal is None:
@@ -223,6 +515,31 @@ def db_init():
                text("ALTER TABLE users ADD COLUMN IF NOT EXISTS last_login DATETIME NULL"))
        except Exception:
            pass
+        try:
+            conn.execute(text(
+                "ALTER TABLE job_descriptions ADD COLUMN IF NOT EXISTS reply_url VARCHAR(512) NULL"))
+        except Exception:
+            pass
+        try:
+            conn.execute(text(
+                "ALTER TABLE job_descriptions ADD COLUMN IF NOT EXISTS contact_email VARCHAR(255) NULL"))
+        except Exception:
+            pass
+        try:
+            conn.execute(text(
+                "ALTER TABLE job_descriptions ADD COLUMN IF NOT EXISTS contact_phone VARCHAR(255) NULL"))
+        except Exception:
+            pass
+        try:
+            conn.execute(text(
+                "ALTER TABLE job_descriptions ADD COLUMN IF NOT EXISTS contact_name VARCHAR(255) NULL"))
+        except Exception:
+            pass
+
+    try:
+        ensure_default_email_template()
+    except Exception:
+        pass


 def upsert_user_interaction(job_id: str | int, *, user_id: Optional[int] = None, seen_at: Optional[str] = None, url_visited: Optional[str] = None, is_user_favorite: Optional[bool] = None):
@@ -247,7 +564,7 @@ def upsert_user_interaction(job_id: str | int, *, user_id: Optional[int] = None,
        session.commit()


-def upsert_listing(*, url: str, region: str, keyword: str, title: str, pay: str, location: str, timestamp: str):
+def upsert_listing(*, url: str, region: str, keyword: str, title: str, pay: str, location: str, timestamp: str, fetched_from: str | None = None, fetched_at: Optional[datetime] = None):
    """Insert or update a job listing row based on job_id derived from URL."""
    job_id = str(url_to_job_id(url))
    with _ensure_session() as session:
@@ -263,19 +580,74 @@ def upsert_listing(*, url: str, region: str, keyword: str, title: str, pay: str,
        setattr(obj, "location", location)
        setattr(obj, "timestamp", timestamp)
        session.commit()
+    # Optionally record a fetch log for the listing if source provided
+    if fetched_from:
+        try:
+            insert_log(fetched_from, region=region, keyword=keyword,
+                       fetched_at=fetched_at or datetime.now())
+        except Exception:
+            pass


-def upsert_job_details(job_data: Dict[str, Any]):
-    """Upsert into job_descriptions table using scraped job details dict."""
+def insert_log(page_url: str, region: str | None = None, keyword: str | None = None, fetched_at: Optional[datetime] = None):
+    """Insert a log row for a fetched page."""
+    fetched_at = fetched_at or datetime.now()
+    with _ensure_session() as session:
+        l = Log(page_url=page_url, region=region or '',
+                keyword=keyword or '', fetched_at=fetched_at)
+        session.add(l)
+        session.commit()
+
+
+def get_last_fetch_time(page_url: str) -> Optional[datetime]:
+    """Return the latest fetched_at for a given page_url, or None if never fetched."""
+    with _ensure_session() as session:
+        row = session.execute(text("SELECT fetched_at FROM logs WHERE page_url = :u ORDER BY fetched_at DESC LIMIT 1"), {
+                              "u": page_url}).fetchone()
+        if row and row[0]:
+            return row[0]
+        return None
+
+
+def upsert_job_details(job_data: Dict[str, Any], region: str = "", keyword: str = ""):
+    """Upsert into job_descriptions table using scraped job details dict.
+
+    Behavior additions:
+    - If the provided job `url` has a log entry with fetched_at less than 24 hours ago,
+      the function will skip updating to avoid unnecessary work.
+    - On successful upsert, a log entry is recorded with `insert_log(url, ...)`.
+    """
+    if not job_data or job_data.get("is_negative_match"):
+        return
+
    url = job_data.get("url")
    job_id = normalize_job_id(job_data.get("id"), url)
    if not job_id:
        return
+    # Skip if job page was fetched recently (24 hours)
+    try:
+        if isinstance(url, str) and url:
+            last = get_last_fetch_time(url)
+            if last is not None:
+                # normalize tz-awareness
+                from datetime import timezone as _tz
+                now = datetime.now(_tz.utc)
+                last_dt = last if getattr(
+                    last, 'tzinfo', None) is not None else last.replace(tzinfo=_tz.utc)
+                if (now - last_dt).total_seconds() < 24 * 3600:
+                    return
+    except Exception:
+        # if log lookup fails, proceed normally
+        pass
    title = job_data.get("title") or None
    company = job_data.get("company") or None
    location = job_data.get("location") or None
    description = job_data.get("description") or None
    posted_time = job_data.get("posted_time") or None
+    reply_url = job_data.get("reply_url") or None
+    contact_email = job_data.get("contact_email") or None
+    contact_phone = job_data.get("contact_phone") or None
+    contact_name = job_data.get("contact_name") or None

    job_id = str(job_id)
    with _ensure_session() as session:
@@ -289,142 +661,20 @@ def upsert_job_details(job_data: Dict[str, Any]):
        setattr(obj, "description", description)
        setattr(obj, "posted_time", posted_time)
        setattr(obj, "url", url)
+        setattr(obj, "reply_url", reply_url)
+        setattr(obj, "contact_email", contact_email)
+        setattr(obj, "contact_phone", contact_phone)
+        setattr(obj, "contact_name", contact_name)
        session.commit()
-
-
-def upsert_cached_page(*, file_path: str, url_guess: Optional[str], last_modified: Optional[str], size_bytes: Optional[int], job_id: Optional[int]):
-    # Store file paths relative to the cache directory (keeps DB portable)
-    cache_dir = os.path.abspath(get_cache_dir())
-    abs_fp = os.path.abspath(file_path)
-    rel_fp = os.path.relpath(abs_fp, start=cache_dir)
-    with _ensure_session() as session:
-        obj = session.get(CachedPage, rel_fp)
-        if obj is None:
-            obj = CachedPage(file_path=rel_fp)
-            session.add(obj)
-        setattr(obj, "url_guess", url_guess)
-        setattr(obj, "last_modified", last_modified)
-        setattr(obj, "size_bytes", size_bytes)
-        setattr(obj, "job_id", str(job_id) if job_id else None)
-        session.commit()
-
-
-def remove_cached_page(file_path: str):
-    # Accept absolute or relative input but DB keys are stored relative to cache dir
-    cache_dir = os.path.abspath(get_cache_dir())
-    abs_fp = os.path.abspath(file_path)
-    rel_fp = os.path.relpath(abs_fp, start=cache_dir)
-    with _ensure_session() as session:
-        obj = session.get(CachedPage, rel_fp)
-        if obj:
-            session.delete(obj)
-            session.commit()
-
-
-def db_remove_cached_url(url: str):
-    """Remove a cached page by URL."""
-    # Compute absolute path for the URL and delegate to remove_cached_page
-    abs_fp = os.path.abspath(get_cache_path(url))
+    # Record that we fetched/updated this job page
    try:
-        remove_cached_page(abs_fp)
+        if isinstance(url, str) and url:
+            insert_log(url, region=None, keyword=None,
+                       fetched_at=datetime.now())
    except Exception:
        pass


-def db_get_all_cached_pages() -> List[Dict[str, Any]]:
-    with _ensure_session() as session:
-        rows = session.execute(text(
-            "SELECT file_path, url_guess, last_modified, size_bytes, job_id FROM cached_pages")).fetchall()
-        out = []
-        for row in rows:
-            fp = row[0]
-            out.append({
-                "file_path": fp,
-                "file_path_abs": db_get_cached_abs_path(fp) if fp else None,
-                "url_guess": row[1],
-                "last_modified": row[2],
-                "size_bytes": row[3],
-                "job_id": row[4],
-            })
-        return out
-
-
-def db_get_cache_url(url: str):
-    """Return the data for a specific URL from cached_pages.
-
-    Arguments:
-        url -- The URL to look up in the cache.
-    """
-    with _ensure_session() as session:
-        row = session.execute(text(
-            "SELECT file_path, url_guess, last_modified, size_bytes, job_id FROM cached_pages WHERE url_guess = :u"), {"u": url}).fetchone()
-        if not row:
-            return None
-        fp = row[0]
-        return {
-            "file_path": fp,
-            "file_path_abs": db_get_cached_abs_path(fp) if fp else None,
-            "url_guess": row[1],
-            "last_modified": row[2],
-            "size_bytes": row[3],
-            "job_id": row[4],
-        }
-
-
-def db_sync_cached_pages(cache_dir: str):
-    """Scan cache_dir and upsert page metadata into cached_pages table."""
-    if not os.path.isdir(cache_dir):
-        return
-    abs_cache = os.path.abspath(cache_dir)
-    # read existing DB keys once for quick membership tests
-    db_cache_paths = {c["file_path"] for c in db_get_all_cached_pages()}
-    for root, _, files in os.walk(abs_cache):
-        for name in files:
-            if not name.lower().endswith(".html"):
-                continue
-            fp = os.path.abspath(os.path.join(root, name))
-            rel_fp = os.path.relpath(fp, start=abs_cache)
-            if rel_fp in db_cache_paths:
-                continue
-            try:
-                stat = os.stat(fp)
-                mtime = datetime.fromtimestamp(stat.st_mtime).isoformat()
-                size = stat.st_size
-            except OSError:
-                mtime = None
-                size = None
-            url_guess = get_url_from_filename(name)
-            job_id = url_to_job_id(url_guess)
-            upsert_cached_page(file_path=rel_fp, url_guess=url_guess,
-                               last_modified=mtime, size_bytes=size, job_id=job_id)
-
-
-def normalize_cached_page_paths() -> int:
-    """Ensure all cached_pages.file_path values are absolute. Returns number of rows updated/normalized."""
-    # Convert any absolute paths in DB to relative paths (relative to cache dir)
-    changed = 0
-    abs_cache = os.path.abspath(get_cache_dir())
-    with _ensure_session() as session:
-        rows = session.execute(text(
-            "SELECT file_path, url_guess, last_modified, size_bytes, job_id FROM cached_pages")).fetchall()
-    for (fp, url_guess, last_modified, size_bytes, job_id) in rows:
-        if os.path.isabs(fp):
-            rel_fp = os.path.relpath(fp, start=abs_cache)
-            upsert_cached_page(
-                file_path=rel_fp,
-                url_guess=url_guess,
-                last_modified=last_modified,
-                size_bytes=size_bytes,
-                job_id=job_id,
-            )
-            with _ensure_session() as session:
-                session.execute(
-                    text("DELETE FROM cached_pages WHERE file_path = :fp"), {"fp": fp})
-                session.commit()
-            changed += 1
-    return changed
-
-
 def db_get_keywords() -> List[str]:
    """Return a list of all unique keywords from job listings."""
    with _ensure_session() as session:
@@ -453,15 +703,10 @@ SELECT l.job_id
 ,l.timestamp
 ,d.posted_time
 ,l.url
-,c.file_path
-,c.last_modified
-,c.url_guess
-,CASE WHEN c.url_guess != l.url THEN 1 ELSE 0 END AS url_guess_stale
 FROM job_listings AS l
-INNER JOIN job_descriptions AS d 
+INNER JOIN job_descriptions AS d
 ON l.job_id = d.job_id
 AND l.url = d.url
-LEFT JOIN cached_pages AS c ON l.job_id = c.job_id
 ORDER BY d.posted_time DESC
    """
    with _ensure_session() as session:
@@ -479,32 +724,21 @@ ORDER BY d.posted_time DESC
                "timestamp": row[7],
                "posted_time": row[8],
                "url": row[9],
-                # file_path is stored relative to cache dir; provide both forms
-                "file_path": row[10],
-                "file_path_abs": os.path.join(os.path.abspath(get_cache_dir()), row[10]) if row[10] else None,
-                "last_modified": row[11],
-                "url_guess": row[12],
-                "url_guess_stale": row[13],
            }
            jobs.append(job)
        return jobs


-def db_get_cached_abs_path(db_file_path: Optional[str]) -> Optional[str]:
-    """Return absolute cache file path given a DB-stored (relative) file_path.
+def db_get_all_job_urls() -> List[dict]:
+    """Return list of job URLs from job_listings.

-    Returns None if input is falsy.
+    Returns:
+    - List of dicts with keys: url, region, keyword
    """
-    if not db_file_path:
-        return None
-    return os.path.join(os.path.abspath(get_cache_dir()), db_file_path)
-
-
-def db_get_all_job_urls() -> List[str]:
-    """Return list of job URLs from job_listings."""
    with _ensure_session() as session:
-        rows = session.execute(text("SELECT url FROM job_listings")).fetchall()
-        return [r[0] for r in rows]
+        rows = session.execute(
+            text("SELECT url, region, keyword FROM job_listings")).fetchall()
+        return [{"url": r[0], "region": r[1], "keyword": r[2]} for r in rows]


 def db_delete_job(job_id: str | int):
@@ -522,10 +756,6 @@ def remove_job(url):
    try:
        jid = url_to_job_id(url)
        db_delete_job(jid)
-        cache_fp = get_cache_path(url)
-        remove_cached_page(os.path.abspath(cache_fp))
-        if os.path.exists(cache_fp):
-            os.remove(cache_fp)
    except Exception:
        pass

@@ -534,7 +764,8 @@ def remove_job(url):

 def get_or_create_user(username: str) -> int:
    """Return user_id for username, creating if missing."""
-    created_at = datetime.now(UTC).isoformat()
+    # 2025-08-30T16:04:29.660245+00:00 is wrong. should be 2025-08-30T16:04:29
+    created_at = datetime.now(UTC).isoformat().split('.')[0]
    with _ensure_session() as session:
        row = session.execute(
            text("SELECT user_id FROM users WHERE username = :u"), {
@@ -654,22 +885,54 @@ def get_user(username: str) -> Optional[Dict[str, Any]]:
    """Return single user dict or None."""
    with _ensure_session() as session:
        row = session.execute(text(
-            "SELECT user_id, username, is_admin, is_active, password_hash, last_login, created_at FROM users WHERE username = :u"
+            "SELECT user_id, username, created_at, is_admin, is_active, last_login, (password_hash IS NOT NULL) AS has_pw FROM users WHERE username = :u"
        ), {"u": username}).fetchone()
        if not row:
            return None
        return {
            "user_id": int(row[0]),
            "username": row[1],
-            "is_admin": bool(row[2]),
-            "is_active": bool(row[3]),
-            "password_hash": row[4],
+            "created_at": row[2].isoformat() if isinstance(row[2], datetime) else (row[2] or None),
+            "is_admin": bool(row[3]),
+            "is_active": bool(row[4]),
            "last_login": row[5].isoformat() if row[5] else None,
-            "created_at": row[6].isoformat() if isinstance(row[6], datetime) else (row[6] or None),
+            "has_password": bool(row[6]),
        }


+def get_user_by_id(user_id: int) -> Optional[Dict[str, Any]]:
+    """Return single user dict or None."""
+    with _ensure_session() as session:
+        row = session.execute(text(
+            "SELECT user_id, username, created_at, is_admin, is_active, last_login, (password_hash IS NOT NULL) AS has_pw FROM users WHERE user_id = :u"
+        ), {"u": user_id}).fetchone()
+        if not row:
+            return None
+        return {
+            "user_id": int(row[0]),
+            "username": row[1],
+            "created_at": row[2].isoformat() if isinstance(row[2], datetime) else (row[2] or None),
+            "is_admin": bool(row[3]),
+            "is_active": bool(row[4]),
+            "last_login": row[5].isoformat() if row[5] else None,
+            "has_password": bool(row[6]),
+        }
+
+
+def delete_user_by_id(user_id: int) -> bool:
+    with _ensure_session() as session:
+        result = session.execute(
+            text("DELETE FROM users WHERE user_id = :u"), {"u": user_id})
+        session.commit()
+        rc = getattr(result, 'rowcount', None)
+        if rc is None:
+            # Unable to determine rowcount; assume success if no exception
+            return True
+        return rc > 0
+
 # ---------------- Regions/Keywords helpers ---------------------------------
+
+
 def upsert_region(name: str) -> int:
    """Get or create a region by name; return region_id."""
    name = (name or "").strip()
@@ -713,6 +976,27 @@ def upsert_keyword(name: str) -> int:
    return upsert_keyword(name)


+def upsert_negative_keyword(name: str) -> int:
+    """Get or create a negative keyword by name; return keyword_id."""
+    name = (name or "").strip().lower()
+    if not name:
+        raise ValueError("Negative keyword cannot be empty")
+    with _ensure_session() as session:
+        row = session.execute(text("SELECT keyword_id FROM negative_keywords WHERE name = :n"), {
+                              "n": name}).fetchone()
+        if row:
+            return int(row[0])
+        session.execute(
+            text("INSERT INTO negative_keywords(name) VALUES (:n)"), {"n": name})
+        session.commit()
+    with _ensure_session() as session:
+        row2 = session.execute(text("SELECT keyword_id FROM negative_keywords WHERE name = :n"), {
+                               "n": name}).fetchone()
+        if row2:
+            return int(row2[0])
+    return upsert_negative_keyword(name)
+
+
 def set_user_regions(username: str, region_names: List[str]) -> None:
    """Replace user's preferred regions with given names."""
    user_id = get_or_create_user(username)
@@ -771,6 +1055,34 @@ def set_user_keywords(username: str, keyword_names: List[str]) -> None:
        session.commit()


+def set_user_negative_keywords(username: str, keyword_names: List[str]) -> None:
+    """Replace user's negative keywords with given names."""
+    user_id = get_or_create_user(username)
+    names = sorted({(n or "").strip().lower()
+                   for n in keyword_names if (n or "").strip()})
+    keyword_ids: List[int] = [upsert_negative_keyword(n) for n in names]
+    if not keyword_ids and not names:
+        with _ensure_session() as session:
+            session.execute(
+                text("DELETE FROM user_negative_keywords WHERE user_id = :u"), {"u": user_id})
+            session.commit()
+        return
+    desired = set(keyword_ids)
+    with _ensure_session() as session:
+        rows = session.execute(text("SELECT keyword_id FROM user_negative_keywords WHERE user_id = :u"), {
+                               "u": user_id}).fetchall()
+        current = set(int(r[0]) for r in rows)
+        to_add = desired - current
+        to_remove = current - desired
+        for kid in to_remove:
+            session.execute(text("DELETE FROM user_negative_keywords WHERE user_id = :u AND keyword_id = :k"), {
+                            "u": user_id, "k": int(kid)})
+        for kid in to_add:
+            session.execute(text("INSERT INTO user_negative_keywords(user_id, keyword_id) VALUES(:u, :k)"), {
+                            "u": user_id, "k": int(kid)})
+        session.commit()
+
+
 def get_user_regions(username: str) -> List[Dict[str, str]]:
    """Return preferred region names for a user (empty if none)."""
    with _ensure_session() as session:
@@ -811,6 +1123,26 @@ def get_user_keywords(username: str) -> List[Dict[str, str]]:
        return [{"name": r[0], "color": r[1]} for r in rows]


+def get_user_negative_keywords(username: str) -> List[str]:
+    """Return negative keyword names for a user (empty if none)."""
+    with _ensure_session() as session:
+        row = session.execute(text("SELECT user_id FROM users WHERE username = :u"), {
+                              "u": username}).fetchone()
+        if not row:
+            return []
+        user_id = int(row[0])
+        rows = session.execute(text(
+            """
+            SELECT k.name
+            FROM negative_keywords k
+            INNER JOIN user_negative_keywords uk ON uk.keyword_id = k.keyword_id
+            WHERE uk.user_id = :u
+            ORDER BY k.name ASC
+            """
+        ), {"u": user_id}).fetchall()
+        return [r[0] for r in rows]
+
+
 def get_all_regions() -> List[Dict[str, str]]:
    """Return all region names from regions table (sorted)."""
    with _ensure_session() as session:
@@ -941,3 +1273,44 @@ def change_keyword_color(keyword_id: int, new_color: str) -> bool:
        except Exception:
            session.rollback()
            return False
+
+
+def stats_overview() -> Dict[str, Any]:
+    """Return an overview of job DB statistics.
+
+    Returns a dict with keys:
+      - total_jobs: int
+      - total_keywords: int (distinct keywords in listings)
+      - total_regions: int (distinct regions in listings)
+      - jobs_per_keyword: List[{"keyword": str, "count": int}]
+      - jobs_per_region: List[{"region": str, "count": int}]
+    """
+    with _ensure_session() as session:
+        total_jobs = session.execute(text(
+            "SELECT COUNT(*) FROM job_listings l INNER JOIN job_descriptions d ON l.job_id = d.job_id AND l.url = d.url"
+        )).scalar_one()
+        total_keywords = session.execute(text(
+            "SELECT COUNT(DISTINCT keyword) FROM job_listings WHERE keyword IS NOT NULL AND keyword != ''"
+        )).scalar_one()
+        total_regions = session.execute(text(
+            "SELECT COUNT(DISTINCT region) FROM job_listings WHERE region IS NOT NULL AND region != ''"
+        )).scalar_one()
+
+        rows = session.execute(text(
+            "SELECT COALESCE(keyword, '') AS keyword, COUNT(*) as cnt FROM job_listings l INNER JOIN job_descriptions d ON l.job_id = d.job_id AND l.url = d.url GROUP BY keyword ORDER BY cnt DESC"
+        )).fetchall()
+        jobs_per_keyword = [
+            {"keyword": r[0], "count": int(r[1])} for r in rows]
+
+        rows = session.execute(text(
+            "SELECT COALESCE(region, '') AS region, COUNT(*) as cnt FROM job_listings l INNER JOIN job_descriptions d ON l.job_id = d.job_id AND l.url = d.url GROUP BY region ORDER BY cnt DESC"
+        )).fetchall()
+        jobs_per_region = [{"region": r[0], "count": int(r[1])} for r in rows]
+
+    return {
+        "total_jobs": int(total_jobs or 0),
+        "total_keywords": int(total_keywords or 0),
+        "total_regions": int(total_regions or 0),
+        "jobs_per_keyword": jobs_per_keyword,
+        "jobs_per_region": jobs_per_region,
+    }
--- a/web/email_service.py
+++ b/web/email_service.py
@@ -0,0 +1,130 @@
+"""Email sending utilities for the jobs scraper."""
+
+from __future__ import annotations
+
+from email.message import EmailMessage
+from typing import Iterable, Sequence
+import smtplib
+
+from web.utils import get_email_settings
+
+
+class EmailConfigurationError(RuntimeError):
+    """Raised when email settings are missing or invalid."""
+
+
+class EmailDeliveryError(RuntimeError):
+    """Raised when an email fails to send."""
+
+
+def _normalize_addresses(addresses: Sequence[str] | str | None) -> list[str]:
+    if not addresses:
+        return []
+    if isinstance(addresses, str):
+        items = [addresses]
+    else:
+        items = list(addresses)
+    cleaned: list[str] = []
+    seen: set[str] = set()
+    for raw in items:
+        if not isinstance(raw, str):
+            continue
+        addr = raw.strip()
+        if not addr:
+            continue
+        lower = addr.lower()
+        if lower in seen:
+            continue
+        seen.add(lower)
+        cleaned.append(addr)
+    return cleaned
+
+
+def _ensure_recipients(*recipient_groups: Iterable[str]) -> list[str]:
+    merged: list[str] = []
+    seen: set[str] = set()
+    for group in recipient_groups:
+        for addr in group:
+            lower = addr.lower()
+            if lower in seen:
+                continue
+            seen.add(lower)
+            merged.append(addr)
+    if not merged:
+        raise EmailConfigurationError(
+            "At least one recipient address is required")
+    return merged
+
+
+def send_email(
+    *,
+    subject: str,
+    body: str,
+    to: Sequence[str] | str,
+    cc: Sequence[str] | str | None = None,
+    bcc: Sequence[str] | str | None = None,
+    reply_to: Sequence[str] | str | None = None,
+    settings: dict | None = None,
+) -> bool:
+    """Send an email using configured SMTP settings.
+
+    Returns True when a message is sent, False when email is disabled.
+    Raises EmailConfigurationError for invalid config and EmailDeliveryError for SMTP failures.
+    """
+
+    config = settings or get_email_settings()
+    if not config.get("enabled"):
+        return False
+
+    smtp_cfg = config.get("smtp", {})
+    host = (smtp_cfg.get("host") or "").strip()
+    if not host:
+        raise EmailConfigurationError("SMTP host is not configured")
+
+    port = int(smtp_cfg.get("port", 587) or 587)
+    timeout = int(smtp_cfg.get("timeout", 30) or 30)
+    use_ssl = bool(smtp_cfg.get("use_ssl", False))
+    use_tls = bool(smtp_cfg.get("use_tls", True))
+
+    from_address = (config.get("from_address")
+                    or smtp_cfg.get("username") or "").strip()
+    if not from_address:
+        raise EmailConfigurationError("From address is not configured")
+
+    to_list = _normalize_addresses(to)
+    cc_list = _normalize_addresses(cc)
+    bcc_list = _normalize_addresses(bcc)
+    reply_to_list = _normalize_addresses(reply_to)
+    all_recipients = _ensure_recipients(to_list, cc_list, bcc_list)
+
+    message = EmailMessage()
+    message["Subject"] = subject
+    message["From"] = from_address
+    message["To"] = ", ".join(to_list)
+    if cc_list:
+        message["Cc"] = ", ".join(cc_list)
+    if reply_to_list:
+        message["Reply-To"] = ", ".join(reply_to_list)
+    message.set_content(body)
+
+    username = (smtp_cfg.get("username") or "").strip()
+    password = smtp_cfg.get("password") or ""
+
+    client_cls = smtplib.SMTP_SSL if use_ssl else smtplib.SMTP
+
+    try:
+        with client_cls(host=host, port=port, timeout=timeout) as client:
+            client.ehlo()
+            if use_tls and not use_ssl:
+                client.starttls()
+                client.ehlo()
+            if username:
+                client.login(username, password)
+            client.send_message(message, from_addr=from_address,
+                                to_addrs=all_recipients)
+    except EmailConfigurationError:
+        raise
+    except Exception as exc:  # pragma: no cover - network errors depend on env
+        raise EmailDeliveryError(str(exc)) from exc
+
+    return True
--- a/web/email_templates.py
+++ b/web/email_templates.py
@@ -0,0 +1,106 @@
+"""Email templates for job notifications."""
+
+from __future__ import annotations
+
+from datetime import datetime, UTC
+from typing import Iterable, Mapping, Dict, Any
+
+DEFAULT_DATETIME_FORMAT = "%Y-%m-%d %H:%M"
+DEFAULT_JOB_ALERT_SUBJECT = "{count_label}{scope}"
+DEFAULT_JOB_ALERT_BODY = (
+    "Hi,\n\n{intro_line}{jobs_section}\n\nGenerated at {timestamp} UTC.\n"
+    "You are receiving this message because job alerts are enabled.\n"
+)
+
+
+class _SafeDict(dict):
+    def __missing__(self, key: str) -> str:
+        return ""
+
+
+def _format_template(template: str, context: Dict[str, Any]) -> str:
+    safe_context = _SafeDict(
+        {k: ("\n".join(str(v) for v in context[k]) if isinstance(
+            context[k], list) else context[k]) for k in context}
+    )
+    return template.format_map(safe_context)
+
+
+def render_job_alert_email(
+    jobs: Iterable[Mapping[str, object]],
+    *,
+    region: str | None = None,
+    keyword: str | None = None,
+    generated_at: datetime | None = None,
+    template_override: Mapping[str, str] | None = None,
+) -> dict[str, Any]:
+    """Render the subject/body for a job alert email.
+
+    Returns a dict with subject/body strings and the context used to render them.
+    """
+
+    job_list = list(jobs)
+    generated_at = generated_at or datetime.now(UTC)
+    timestamp = generated_at.strftime(DEFAULT_DATETIME_FORMAT)
+
+    scope_parts = []
+    if region:
+        scope_parts.append(f"region: {region}")
+    if keyword:
+        scope_parts.append(f"keyword: {keyword}")
+    scope = " (" + ", ".join(scope_parts) + ")" if scope_parts else ""
+
+    job_lines: list[str] = []
+    for index, job in enumerate(job_list, start=1):
+        title = str(job.get("title", "Untitled"))
+        company = str(job.get("company", "Unknown company"))
+        location = str(job.get("location", "N/A"))
+        url = str(job.get("url", ""))
+        line = f"{index}. {title} — {company} ({location})"
+        job_lines.append(line)
+        if url:
+            job_lines.append(f"   {url}")
+
+    if job_lines:
+        jobs_section = "\n" + "\n".join(job_lines)
+    else:
+        jobs_section = "\nNo jobs matched this alert."
+    jobs_message = jobs_section.strip()
+    context: Dict[str, Any] = {
+        "count": len(job_list),
+        "count_label": "No new jobs" if not job_list else f"{len(job_list)} new jobs",
+        "scope": scope,
+        "region": region or "",
+        "keyword": keyword or "",
+        "timestamp": timestamp,
+        "generated_at": generated_at,
+        "intro_line": "Here are the latest jobs discovered by the scraper:",
+        "jobs_message": jobs_message,
+        "jobs_section": jobs_section,
+        "jobs_lines": job_lines,
+        "has_jobs": bool(job_list),
+    }
+
+    template = template_override
+    if template is None:
+        try:
+            from web.db import get_email_template_by_slug
+
+            template = get_email_template_by_slug("job-alert")
+        except Exception:
+            template = None
+
+    template_subject = (template or {}).get(
+        "subject") or DEFAULT_JOB_ALERT_SUBJECT
+    template_body = (template or {}).get("body") or DEFAULT_JOB_ALERT_BODY
+
+    subject = _format_template(template_subject, context)
+    body = _format_template(template_body, context)
+
+    result = {
+        "subject": subject,
+        "body": body,
+        "context": context,
+        "template_slug": (template or {}).get("slug", "job-alert"),
+    }
+    return result
--- a/web/scraper.py
+++ b/web/scraper.py
@@ -1,7 +1,82 @@
 from datetime import datetime, UTC
 from bs4 import BeautifulSoup
 from typing import List, Dict, Set
-from web.utils import get_base_url, cache_page, safe_get_text, safe_get_attr, is_cached, get_cached_content, make_request_with_retry
+from urllib.parse import urlparse, parse_qs
+import re
+from web.utils import (
+    get_base_url,
+    safe_get_text,
+    safe_get_attr,
+    make_request_with_retry,
+    get_negative_keywords,
+)
+
+
+def extract_contact_info(reply_url) -> Dict[str, str]:
+    """Extract contact information from reply URL.
+
+    Parses mailto links, phone links, and contact form URLs to extract:
+    - email: Email address (from mailto links)
+    - phone: Phone number (from tel links or URL parameters)
+    - contact_name: Contact person name (if available in URL parameters)
+
+    Returns a dict with email, phone, and contact_name keys (values may be "N/A").
+    """
+    contact_info = {
+        "email": "N/A",
+        "phone": "N/A",
+        "contact_name": "N/A"
+    }
+
+    # Handle None or empty cases
+    if not reply_url or reply_url == "N/A":
+        return contact_info
+
+    reply_url = str(reply_url).strip()
+    if not reply_url or reply_url == "N/A":
+        return contact_info
+
+    try:
+        # Check for mailto links
+        if reply_url.startswith("mailto:"):
+            email_part = reply_url.replace("mailto:", "")
+            # Extract email (may contain ?subject=...)
+            email = email_part.split("?")[0]
+            contact_info["email"] = email
+            return contact_info
+
+        # Check for tel links
+        if reply_url.startswith("tel:"):
+            phone = reply_url.replace("tel:", "")
+            contact_info["phone"] = phone
+            return contact_info
+
+        # Parse as URL
+        if reply_url.startswith("http"):
+            parsed = urlparse(reply_url)
+            params = parse_qs(parsed.query)
+
+            # Try to extract email from parameters
+            for key in ["email", "from_email", "sender_email", "contact_email"]:
+                if key in params:
+                    contact_info["email"] = params[key][0]
+                    break
+
+            # Try to extract phone from parameters
+            for key in ["phone", "tel", "telephone"]:
+                if key in params:
+                    contact_info["phone"] = params[key][0]
+                    break
+
+            # Try to extract contact name from parameters
+            for key in ["contact_name", "from_name", "name"]:
+                if key in params:
+                    contact_info["contact_name"] = params[key][0]
+                    break
+    except Exception:
+        pass
+
+    return contact_info


 def scrape_listings_page(listing, region: str, keyword: str, seen_urls: Set[str]) -> List:
@@ -40,6 +115,16 @@ def scrape_job_page(content: str, url: str) -> Dict:
    """Scrape job details from a job listing page."""
    soup = BeautifulSoup(content, "html.parser")

+    # Extract reply button
+    reply_button = soup.find("button", class_="reply-button")
+    if reply_button:
+        reply_url = safe_get_attr(reply_button, "data-href")
+    else:
+        reply_url = "N/A"
+
+    # Extract contact information from reply URL
+    contact_info = extract_contact_info(reply_url)
+
    # Extract each field
    title = safe_get_text(soup.find("h1", class_="postingtitle"))
    company = safe_get_text(soup.find("h2", class_="company-name"))
@@ -80,6 +165,30 @@ def scrape_job_page(content: str, url: str) -> Dict:
        job_id = ""
        posted_time = ""

+    # Negative keyword detection
+    negative_keyword_match = None
+    negative_match_field = None
+    negative_keywords = get_negative_keywords()
+    if negative_keywords:
+        fields_to_check = {
+            "title": title or "",
+            "company": company or "",
+            "location": location or "",
+            "description": description or "",
+        }
+        for keyword in negative_keywords:
+            if not keyword:
+                continue
+            pattern = re.compile(
+                r"\b" + re.escape(keyword) + r"\b", re.IGNORECASE)
+            for field_name, field_value in fields_to_check.items():
+                if field_value and pattern.search(field_value):
+                    negative_keyword_match = keyword
+                    negative_match_field = field_name
+                    break
+            if negative_keyword_match:
+                break
+
    return {
        "url": url,
        "title": title,
@@ -87,7 +196,14 @@ def scrape_job_page(content: str, url: str) -> Dict:
        "location": location,
        "description": description,
        "id": job_id,
-        "posted_time": posted_time
+        "posted_time": posted_time,
+        "reply_url": reply_url,
+        "contact_email": contact_info["email"],
+        "contact_phone": contact_info["phone"],
+        "contact_name": contact_info["contact_name"],
+        "negative_keyword_match": negative_keyword_match,
+        "negative_match_field": negative_match_field,
+        "is_negative_match": bool(negative_keyword_match),
    }


@@ -108,14 +224,7 @@ def scrape_job_data(content: str, region: str, keyword: str, seen_urls: Set[str]
 def process_region_keyword(region: str, keyword: str, seen_urls: Set[str]) -> List[List]:
    """Process a single region and keyword."""
    url = get_base_url().format(region=region, keyword=keyword.replace(" ", "+"))
-    if is_cached(url):
-        content = get_cached_content(url)
-        cache_status = "CACHED"
-    else:
-        content = make_request_with_retry(url, 3)
-        if content is None:
-            return []
-        cache_page(url, content)
-        cache_status = "FETCHED"
-    _ = cache_status  # no-op to silence unused var
+    content = make_request_with_retry(url, 1)
+    if content is None:
+        return []
    return scrape_job_data(content, region, keyword, seen_urls)
--- a/web/static/index.js
+++ b/web/static/index.js
@@ -41,12 +41,16 @@ function scrape(event) {
  event.preventDefault(); // Prevent the default form submission
  updateScrapeInfo("Scraping in progress...", "blue");
  fetch("/scrape")
-    .then((response) => response.json())
+    // expect HTML response containing "Scraping completed successfully!"
+    .then((response) => response.text())
    .then((data) => {
-      if (data.status) {
-        updateScrapeInfo(data.status, "green");
+      if (data.includes("Scraping completed successfully!")) {
+        updateScrapeInfo("Scraping completed successfully!", "green");
      } else {
-        updateScrapeInfo("Scraping failed. Please try again.", "red");
+        updateScrapeInfo(
+          "Scraping failed or timed out. Please try again.",
+          "red"
+        );
      }
    })
    .catch((error) => console.error("Error:", error));
--- a/web/static/scrape.js
+++ b/web/static/scrape.js
@@ -0,0 +1,44 @@
+function startScrape() {
+  const output = document.getElementById("output");
+  const startButton = document.getElementById("start-scrape");
+
+  output.textContent = "Starting scrape...\n";
+  startButton.disabled = true;
+  startButton.textContent = "Scraping...";
+
+  fetch("/scrape")
+    .then((response) => {
+      const reader = response.body.getReader();
+      const decoder = new TextDecoder();
+
+      function readStream() {
+        reader
+          .read()
+          .then(({ done, value }) => {
+            if (done) {
+              output.textContent += "\nScraping completed!";
+              startButton.disabled = false;
+              startButton.textContent = "Start Scraping";
+              return;
+            }
+
+            const chunk = decoder.decode(value, { stream: true });
+            output.textContent += chunk;
+            output.scrollTop = output.scrollHeight;
+            readStream();
+          })
+          .catch((error) => {
+            output.textContent += `\nError: ${error.message}`;
+            startButton.disabled = false;
+            startButton.textContent = "Start Scraping";
+          });
+      }
+
+      readStream();
+    })
+    .catch((error) => {
+      output.textContent = `Error starting scrape: ${error.message}`;
+      startButton.disabled = false;
+      startButton.textContent = "Start Scraping";
+    });
+}
--- a/web/static/settings.js
+++ b/web/static/settings.js
@@ -1,4 +1,22 @@
 /* javascript form handling */
+document.addEventListener("DOMContentLoaded", function () {
+  const newNkInput = document.getElementById("new-negative-keyword");
+  if (newNkInput) {
+    newNkInput.addEventListener("input", function () {
+      const val = this.value.trim();
+      const existing = Array.from(
+        document.querySelectorAll('input[name="negative_keyword"]')
+      ).map((el) => el.value);
+      if (existing.includes(val)) {
+        this.setCustomValidity("Keyword already exists");
+        this.reportValidity();
+      } else {
+        this.setCustomValidity("");
+      }
+    });
+  }
+});
+
 document
  .getElementById("user-settings-form")
  .addEventListener("submit", function (event) {
@@ -10,11 +28,15 @@ document
    // Collect selected regions and keywords
    const selectedRegions = [];
    const selectedKeywords = [];
+    const selectedNegativeKeywords = [];
+
    formData.forEach((value, key) => {
      if (key === "region") {
        selectedRegions.push(value);
      } else if (key === "keyword") {
        selectedKeywords.push(value);
+      } else if (key === "negative_keyword") {
+        selectedNegativeKeywords.push(value);
      }
    });

@@ -30,10 +52,21 @@ document
      selectedKeywords.push(newKeyword);
    }

+    // Add new negative keyword if provided
+    const newNegativeKeyword = formData.get("new-negative-keyword").trim();
+    if (newNegativeKeyword) {
+      if (selectedNegativeKeywords.includes(newNegativeKeyword)) {
+        alert("Negative keyword already exists!");
+        return;
+      }
+      selectedNegativeKeywords.push(newNegativeKeyword);
+    }
+
    // Prepare data to send
    const dataToSend = {
      regions: selectedRegions,
      keywords: selectedKeywords,
+      negative_keywords: selectedNegativeKeywords,
      csrf_token: formData.get("csrf_token"),
    };

--- a/web/templates/admin/email.html
+++ b/web/templates/admin/email.html
@@ -0,0 +1,62 @@
+{% extends 'base.html' %} {% block content %}
+<h2>Email Subscriptions</h2>
+<section>
+  <h3>Add Subscription</h3>
+  <form method="post">
+    <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+    <input type="hidden" name="action" value="subscribe" />
+    <label for="email">Email address</label>
+    <input
+      type="email"
+      id="email"
+      name="email"
+      placeholder="alerts@example.com"
+      required
+    />
+    <button type="submit">Subscribe</button>
+  </form>
+</section>
+<section>
+  <h3>Current Recipients</h3>
+  {% if not subscriptions %}
+  <p>No subscriptions yet. Add one above to start sending alerts.</p>
+    <p>You can customize alert content from the <a href="{{ url_for('admin_email_templates') }}">Email Templates</a> page.</p>
+  {% else %}
+  <p>{{ total_active }} active of {{ total }} total.</p>
+  <table>
+    <thead>
+      <tr>
+        <th>Email</th>
+        <th>Status</th>
+        <th>Created</th>
+        <th>Updated</th>
+        <th>Action</th>
+      </tr>
+    </thead>
+    <tbody>
+      {% for sub in subscriptions %}
+      <tr>
+        <td>{{ sub.email }}</td>
+        <td>{{ 'Active' if sub.is_active else 'Inactive' }}</td>
+        <td>{{ sub.created_at }}</td>
+        <td>{{ sub.updated_at }}</td>
+        <td>
+          <form method="post" style="display: inline-flex; gap: 0.5rem">
+            <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+            <input type="hidden" name="email" value="{{ sub.email }}" />
+            {% if sub.is_active %}
+            <input type="hidden" name="action" value="unsubscribe" />
+            <button type="submit">Deactivate</button>
+            {% else %}
+            <input type="hidden" name="action" value="reactivate" />
+            <button type="submit">Reactivate</button>
+            {% endif %}
+          </form>
+        </td>
+      </tr>
+      {% endfor %}
+    </tbody>
+  </table>
+  {% endif %}
+</section>
+{% endblock %}
--- a/web/templates/admin/email_templates.html
+++ b/web/templates/admin/email_templates.html
@@ -0,0 +1,102 @@
+{% extends 'base.html' %}
+{% block content %}
+<h2>Email Templates</h2>
+<section>
+  <h3>Available Templates</h3>
+  {% if not templates %}
+  <p>No templates found. Create one below to get started.</p>
+  {% else %}
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Slug</th>
+        <th>Status</th>
+        <th>Updated</th>
+        <th>Actions</th>
+      </tr>
+    </thead>
+    <tbody>
+      {% for template in templates %}
+      <tr>
+        <td>{{ template.name }}</td>
+        <td>{{ template.slug }}</td>
+        <td>{{ 'Active' if template.is_active else 'Inactive' }}</td>
+        <td>{{ template.updated_at or template.created_at or '' }}</td>
+        <td style="display: flex; gap: 0.5rem;">
+          <a class="button" href="{{ url_for('admin_email_templates', template_id=template.template_id) }}">Edit</a>
+          <a class="button" href="{{ url_for('admin_email_templates', preview_id=template.template_id) }}">Preview</a>
+          <form method="post" onsubmit="return confirm('Delete template {{ template.name }}?');">
+            <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+            <input type="hidden" name="action" value="delete" />
+            <input type="hidden" name="template_id" value="{{ template.template_id }}" />
+            <button type="submit">Delete</button>
+          </form>
+        </td>
+      </tr>
+      {% endfor %}
+    </tbody>
+  </table>
+  {% endif %}
+</section>
+<section>
+  <h3>{{ 'Edit Template' if editing else 'Create Template' }}</h3>
+  <form method="post">
+    <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+    <input type="hidden" name="action" value="{{ 'update' if editing else 'create' }}" />
+    {% if editing %}
+    <input type="hidden" name="template_id" value="{{ editing.template_id }}" />
+    {% endif %}
+    <div>
+      <label for="name">Name</label>
+      <input type="text" id="name" name="name" value="{{ editing.name if editing else '' }}" required />
+    </div>
+    <div>
+      <label for="slug">Slug</label>
+      <input type="text" id="slug" name="slug" placeholder="job-alert" value="{{ editing.slug if editing else '' }}" />
+      <small>Leave blank to reuse the name. Slug must be URL friendly (letters, numbers, dashes).</small>
+    </div>
+    <div>
+      <label for="subject">Subject Template</label>
+      <input type="text" id="subject" name="subject" value="{{ editing.subject if editing else '' }}" required />
+    </div>
+    <div>
+      <label for="body">Body Template</label>
+      <textarea id="body" name="body" rows="12" required>{{ editing.body if editing else '' }}</textarea>
+    </div>
+    <div>
+      <label>
+        <input type="checkbox" name="is_active" {% if editing is none or editing.is_active %}checked{% endif %} />
+        Active
+      </label>
+    </div>
+    <button type="submit">{{ 'Update Template' if editing else 'Create Template' }}</button>
+    {% if editing %}
+    <a class="button" href="{{ url_for('admin_email_templates') }}">Cancel</a>
+    {% endif %}
+  </form>
+  <aside>
+    <h4>Available placeholders</h4>
+    <ul>
+      <li><code>{count}</code> – number of jobs in the alert</li>
+      <li><code>{count_label}</code> – "No new jobs" or "X new jobs"</li>
+      <li><code>{scope}</code> – formatted region/keyword context</li>
+      <li><code>{region}</code>, <code>{keyword}</code></li>
+      <li><code>{timestamp}</code> – formatted timestamp</li>
+      <li><code>{jobs_section}</code> – newline-prefixed block of job entries</li>
+      <li><code>{jobs_message}</code> – jobs block without leading newline</li>
+    </ul>
+  </aside>
+</section>
+{% if preview %}
+<section>
+  <h3>Preview: {{ preview_template.name if preview_template else 'Job Alert' }}</h3>
+  <article>
+    <h4>Subject</h4>
+    <pre>{{ preview.subject }}</pre>
+    <h4>Body</h4>
+    <pre>{{ preview.body }}</pre>
+  </article>
+</section>
+{% endif %}
+{% endblock %}
--- a/web/templates/admin/stats.html
+++ b/web/templates/admin/stats.html
@@ -0,0 +1,46 @@
+{% extends 'base.html' %} {% block content %}
+<div id="admin-stats">
+  <h2>Database Statistics</h2>
+  <div class="stats-summary">
+    <p><strong>Total jobs:</strong> {{ stats.total_jobs }}</p>
+    <p><strong>Total keywords:</strong> {{ stats.total_keywords }}</p>
+    <p><strong>Total regions:</strong> {{ stats.total_regions }}</p>
+  </div>
+
+  <h3>Jobs per keyword</h3>
+  <table>
+    <thead>
+      <tr>
+        <th>Keyword</th>
+        <th>Count</th>
+      </tr>
+    </thead>
+    <tbody>
+      {% for row in stats.jobs_per_keyword %}
+      <tr>
+        <td>{{ row.keyword or '(empty)' }}</td>
+        <td>{{ row.count }}</td>
+      </tr>
+      {% endfor %}
+    </tbody>
+  </table>
+
+  <h3>Jobs per region</h3>
+  <table>
+    <thead>
+      <tr>
+        <th>Region</th>
+        <th>Count</th>
+      </tr>
+    </thead>
+    <tbody>
+      {% for row in stats.jobs_per_region %}
+      <tr>
+        <td>{{ row.region or '(empty)' }}</td>
+        <td>{{ row.count }}</td>
+      </tr>
+      {% endfor %}
+    </tbody>
+  </table>
+</div>
+{% endblock %}
--- a/web/templates/admin/user.html
+++ b/web/templates/admin/user.html
@@ -0,0 +1,114 @@
+{% extends 'base.html' %} {% block content %}
+<div id="user-details">
+  {% if not user %}
+  <h2>Create new user</h2>
+  <form id="new-user-form" method="post" action="{{ url_for('admin_users') }}">
+    <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+    <div id="user-info">
+      <p>
+        <strong>Username:</strong>
+        <input type="text" name="username" required />
+      </p>
+      <p>
+        <strong>Password:</strong>
+        <input type="password" name="password" required />
+      </p>
+      <p>
+        <strong>Admin:</strong>
+        <input type="checkbox" name="is_admin" />
+      </p>
+      <p>
+        <strong>Active:</strong>
+        <input type="checkbox" name="is_active" />
+      </p>
+      <button type="submit">Create User</button>
+    </div>
+  </form>
+  {% else %}
+  <h2>User {{ user.username }}</h2>
+  <form
+    id="user-form"
+    method="post"
+    action="{{ url_for('admin_user', user_id=user.user_id) }}"
+  >
+    <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+    <input type="hidden" name="user_id" value="{{ user.user_id }}" />
+    <input type="hidden" name="username" value="{{ user.username }}" />
+    <div id="user-info">
+      <p><strong>ID:</strong> {{ user.user_id }}</p>
+      <p><strong>Username:</strong> {{ user.username }}</p>
+      <p><strong>Created At:</strong> {{ user.created_at }}</p>
+      <p><strong>Last Login:</strong> {{ user.last_login }}</p>
+      <p>
+        <strong>Admin:</strong>
+        <input type="checkbox" name="is_admin" {{ 'checked' if user.is_admin
+        else '' }} />
+      </p>
+      <p>
+        <strong>Active:</strong>
+        <input type="checkbox" name="is_active" {{ 'checked' if user.is_active
+        else '' }} />
+      </p>
+      <p>
+        <strong>Has Password:</strong> {{ '✅' if user.has_password else '❌' }}
+      </p>
+      <p>
+        <strong>New Password:</strong>
+        <input type="password" id="new_password" name="new_password" />
+      </p>
+      <button type="submit">Save</button>
+    </div>
+  </form>
+</div>
+<script>
+  const userForm = document.getElementById("user-form");
+
+  userForm.addEventListener("submit", function (event) {
+    const userId = document.getElementById("user_id").value;
+    event.preventDefault(); // Prevent the default form submission
+    updateUser(userId);
+  });
+
+  function updateUser(userId) {
+    const passwordInput = document.getElementById("new_password");
+    const formData = userForm.elements;
+    const username = formData.username.value;
+    const password = passwordInput.value;
+    const isAdmin = formData.is_admin.checked;
+    const isActive = formData.is_active.checked;
+    const hasPassword = passwordInput.value.trim() !== "";
+
+    fetch("/admin/user/" + userId, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "X-CSRF-Token": formData.csrf_token.value,
+      },
+      body: JSON.stringify({
+        user_id: userId,
+        password: password,
+        username: username,
+        is_admin: isAdmin,
+        is_active: isActive,
+      }),
+    })
+      .then((response) => {
+        if (response.ok) {
+          alert("User updated successfully");
+          // Clear the password field after successful update
+          passwordInput.value = "";
+          // Set 'has_password' indicator
+          userForm.querySelector('input[name="has_password"]').value =
+            hasPassword ? "✅" : "❌";
+        } else {
+          alert("Error updating user");
+        }
+      })
+      .catch((error) => {
+        console.error("Error:", error);
+        alert("Error updating user");
+      });
+  }
+</script>
+
+{% endif %} {% endblock %} {% block footer_scripts %} {% endblock %}
--- a/web/templates/admin/users.html
+++ b/web/templates/admin/users.html
@@ -1,139 +1,100 @@
 {% extends 'base.html' %} {% block content %}
 <div id="users">
  <h2>Users</h2>
-  <form id="user-form" method="post" action="{{ url_for('admin_users') }}">
-    <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
-    <table>
-      <thead>
-        <tr>
-          <th>ID</th>
-          <th>Username</th>
-          <th>Admin</th>
-          <th>Active</th>
-          <th colspan="2">Password</th>
-          <th>Created</th>
-          <th>Last Login</th>
-          <th></th>
-        </tr>
-      </thead>
-      <tbody>
-        {% for u in users %}
-        <tr class="user-row" data-user-id="{{ u.user_id }}">
-          <td>
-            {{ u.user_id }}<input
-              type="hidden"
-              name="user_id"
-              value="{{ u.user_id }}"
-            />
-          </td>
-          <td>
-            <input
-              type="text"
-              name="username"
-              value="{{ u.username }}"
-              required
-            />
-          </td>
-          <td>
-            <input type="checkbox" name="is_admin" {{ 'checked' if u.is_admin
-            else '' }} />
-          </td>
-          <td>
-            <input type="checkbox" name="is_active" {{ 'checked' if u.is_active
-            else '' }} />
-          </td>
-          <td>{{ '✅' if u.has_password else '❌' }}</td>
-          <td><input type="password" name="password" /></td>
-          <td>{{ u.created_at }}</td>
-          <td>{{ u.last_login or 'never' }}</td>
-          <td>
-            <button type="submit" data-user-id="{{ u.user_id }}">Save</button>
-          </td>
-        </tr>
-        {% endfor %}
-      </tbody>
-    </table>
-  </form>
-</div>
-<h3>Create / Update User</h3>
-<form
-  id="create-update-user-form"
-  method="post"
-  action="{{ url_for('admin_users') }}"
->
  <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
-  <label>Username <input type="text" name="username" required /></label>
-  <label>Password <input type="password" name="password" /></label>
-  <label>Admin <input type="checkbox" name="is_admin" value="1" /></label>
-  <label
-    >Active <input type="checkbox" name="is_active" value="1" checked
-  /></label>
-  <button type="submit">Save</button>
-</form>
+  <table>
+    <thead>
+      <tr>
+        <th>ID</th>
+        <th>Username</th>
+        <th>Admin</th>
+        <th>Active</th>
+        <th>Password</th>
+        <th>Created</th>
+        <th>Last Login</th>
+
+        <th>Edit</th>
+        <th>Delete</th>
+      </tr>
+    </thead>
+    <tbody>
+      {% for u in users %}
+      <tr class="user-row" data-user-id="{{ u.user_id }}">
+        <td>{{ u.user_id }}</td>
+        <td>
+          <a href="{{ url_for('admin_user', user_id=u.user_id) }}"
+            >{{ u.username }}</a
+          >
+        </td>
+        <td>{{ '✅' if u.is_admin else '❌' }}</td>
+        <td>{{ '✅' if u.is_active else '❌' }}</td>
+        <td>{{ '✅' if u.has_password else '❌' }}</td>
+        <td>{{ u.created_at }}</td>
+        <td>{{ u.last_login or 'never' }}</td>
+        <td>
+          <button
+            type="button"
+            class="edit-user"
+            data-user-id="{{ u.user_id }}"
+            onclick="editUser({{ u.user_id }})"
+          >
+            Edit
+          </button>
+        </td>
+        <td>
+          <button
+            type="button"
+            class="delete-user"
+            data-user-id="{{ u.user_id }}"
+            onclick="deleteUser({{ u.user_id }})"
+          >
+            Delete
+          </button>
+        </td>
+      </tr>
+      {% endfor %}
+    </tbody>
+  </table>
+</div>
+<h2>Create New User</h2>
+<a href="{{ url_for('admin_user', user_id='new') }}">Create User</a>
 {% endblock %} {% block footer_scripts %}
 <script>
-  function updateUser(userId) {
-    const row = document.querySelector(`.user-row[data-user-id="${userId}"]`);
-    const passwordInput = row.querySelector('input[name="password"]');
-    const hasPassword =
-      row.querySelector("td:nth-child(5)").textContent.trim() === "✅";
-    const formData = row.querySelector("form").elements;
-    const username = formData.username.value;
-    const password = hasPassword ? passwordInput.value : undefined;
-    const isAdmin = formData.is_admin.checked;
-    const isActive = formData.is_active.checked;
-
-    fetch("/admin/users", {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-      },
-      body: JSON.stringify({
-        user_id: userId,
-        password: password,
-        username: username,
-        is_admin: isAdmin,
-        is_active: isActive,
-        csrf_token: formData.csrf_token.value,
-      }),
-    })
-      .then((response) => {
-        if (response.ok) {
-          alert("User updated successfully");
-          // Clear the password field after successful update
-          passwordInput.value = "";
-        } else {
-          alert("Error updating user");
-        }
+  function editUser(userId) {
+    window.location.href = `/admin/user/${userId}`;
+  }
+  function deleteUser(userId) {
+    if (
+      confirm(
+        "Are you sure you want to delete this user? This action cannot be undone."
+      )
+    ) {
+      fetch(`/admin/user/${userId}/delete`, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "X-CSRFToken": document.querySelector('input[name="csrf_token"]')
+            .value,
+        },
      })
-      .catch((error) => {
-        console.error("Error:", error);
-        alert("Error updating user");
-      });
+        .then((response) => {
+          if (response.ok) {
+            // Remove the user row from the table
+            const row = document.querySelector(
+              `.user-row[data-user-id="${userId}"]`
+            );
+            if (row) {
+              row.remove();
+            }
+          } else {
+            alert("Error deleting user.");
+          }
+        })
+        .catch((error) => {
+          console.error("Error:", error);
+          alert("Error deleting user.");
+        });
+    }
  }
-
-  function initUserForm() {
-    const form = document.getElementById("user-form");
-    const createUpdateForm = document.getElementById("create-update-user-form");
-
-    form.addEventListener("submit", function (event) {
-      const userId = event.target.querySelector('input[name="user_id"]').value;
-      event.preventDefault(); // Prevent the default form submission
-      updateUser(userId);
-    });
-
-    form.addEventListener("click", function (event) {
-      const userId = event.target.closest(".user-row").dataset.userId;
-      updateUser(userId);
-    });
-
-    createUpdateForm.addEventListener("submit", function (event) {
-      const passwordInput = createUpdateForm.querySelector(
-        'input[name="password"]'
-      );
-    });
-  }
-
-  initUserForm();
 </script>
 {% endblock %}
--- a/web/templates/base.html
+++ b/web/templates/base.html
@@ -16,15 +16,21 @@
    <header>
      <h1><a href="/">{{ title or 'Admin' }}</a></h1>
      <nav>
-        {% if username %}<span>Hi, {{ username }}</span> | {% endif %}
-        <a href="{{ url_for('index') }}">Home</a> |
-        <a href="{{ url_for('user_settings') }}">Preferences</a>
-        {% if current_user and current_user.is_admin %} |
-        <a href="{{ url_for('admin_taxonomy') }}">Taxonomy</a> |
-        <a href="{{ url_for('admin_users') }}">Users</a> {% endif %} {% if
-        session.get('username') %} |
-        <a href="{{ url_for('logout') }}">Logout</a> {% else %} |
-        <a href="{{ url_for('login') }}">Login</a>{% endif %}
+        <div id="navigation">
+          {% if username %}<span>Hi, {{ username }}</span> | {% endif %}
+          <a href="{{ url_for('index') }}">Home</a> |
+          <a href="{{ url_for('user_settings') }}">Preferences</a>
+          {% if current_user and current_user.is_admin %} |
+          <a href="{{ url_for('scrape_page') }}">Scrape Jobs</a> |
+          <a href="{{ url_for('admin_taxonomy') }}">Taxonomy</a> |
+          <a href="{{ url_for('admin_stats') }}">Statistics</a> |
+          <a href="{{ url_for('admin_emails') }}">Email Alerts</a> |
+          <a href="{{ url_for('admin_email_templates') }}">Email Templates</a> |
+          <a href="{{ url_for('admin_users') }}">Users</a> {% endif %} {% if
+          session.get('username') %} |
+          <a href="{{ url_for('logout') }}">Logout</a> {% else %} |
+          <a href="{{ url_for('login') }}">Login</a>{% endif %}
+        </div>
      </nav>
      {% with messages = get_flashed_messages() %} {% if messages %}
      <ul>
--- a/web/templates/index.html
+++ b/web/templates/index.html
@@ -44,7 +44,8 @@
 <div id="jobs">
    {% for job in jobs %}
    <div class="job">
-      <h3><a href="{{ job['url'] }}" target="_blank">{{ job['title'] }}</a></h3>
+      <!--<h3><a href="{{ job['url'] }}" target="_blank">{{ job['title'] }}</a></h3>-->
+      <h3><a href="{{ url_for('job_by_id', job_id=job['id']) }}" target="_blank">{{ job['title'] }}</a></h3>
      <p class="job-posted-time">{{ job['posted_time'] }}</p>
      <span class="job-region region-{{ job['region'] }}">{{ job['region'] }}</span>
      <span class="job-keyword keyword-{{ job['keyword']|replace(' ', '')|lower }}">{{ job['keyword'] }}</span>
--- a/web/templates/job.html
+++ b/web/templates/job.html
@@ -23,13 +23,5 @@ styles %}{% endblock %} {% block content %}
      >{{ job.title }}</a
    >
  </p>
-  {% if job.file_path_abs or job.file_path %}
-  <p>
-    <strong>Cached copy:</strong>
-    <a href="{{ url_for('serve_cached', job_id=job.id) }}" target="_blank"
-      >View cached copy</a
-    >
-  </p>
-  {% endif %}
 </div>
 {% endblock %}
--- a/web/templates/scrape.html
+++ b/web/templates/scrape.html
@@ -0,0 +1,22 @@
+{% extends "base.html" %} {% block title %}Scrape Jobs{% endblock %} {% block
+content %}
+<div id="scrape-container">
+  <h2>Job Scraping Progress</h2>
+  <button id="start-scrape" onclick="startScrape()">Start Scraping</button>
+  <div
+    id="output"
+    style="
+      margin-top: 20px;
+      padding: 10px;
+      border: 1px solid #ccc;
+      height: 400px;
+      overflow-y: auto;
+      background-color: #f9f9f9;
+      font-family: monospace;
+      white-space: pre-wrap;
+    "
+  ></div>
+</div>
+{% endblock %} {% block scripts %}
+<script src="{{ url_for('static', filename='scrape.js') }}"></script>
+{% endblock %}
--- a/web/templates/user/settings.html
+++ b/web/templates/user/settings.html
@@ -77,6 +77,29 @@ block content %}
    <p>No keywords available. Ask an admin to add some.</p>
    {% endif %}
  </fieldset>
+  <fieldset>
+    <legend>Negative Keywords</legend>
+    <p>
+      <small>Add new Negative Keyword:</small>
+      <input
+        type="text"
+        name="new-negative-keyword"
+        id="new-negative-keyword"
+        value=""
+        placeholder="Type a keyword and save to add"
+        size="30"
+      />
+    </p>
+    {% if user_negative_keywords %} {% for nk in user_negative_keywords %}
+    <label style="display: block">
+      <input type="checkbox" name="negative_keyword" value="{{ nk }}" checked />
+      {{ nk }}
+    </label>
+    {% endfor %} {% else %}
+    <p>No negative keywords set.</p>
+    {% endif %}
+    <p><small>Uncheck to remove.</small></p>
+  </fieldset>
  <button type="submit">Save</button>
 </form>
 {% endblock %} {% block footer_scripts %}
--- a/web/utils.py
+++ b/web/utils.py
@@ -2,7 +2,7 @@
 Utility functions for the Craigslist scraper.
 """

-from typing import Any, Optional as _Optional
+from typing import Any, Optional, List, Dict
 from datetime import datetime, UTC
 import json
 import os
@@ -10,7 +10,6 @@ import random
 import re
 import requests
 import time
-from typing import Optional, List, Dict


 def get_config_file() -> str:
@@ -94,10 +93,6 @@ def get_paths() -> dict:
    return get_config().get('paths', {})


-def get_cache_dir() -> str:
-    return get_paths().get('cache_dir', 'cache')
-
-
 def get_logs_dir() -> str:
    return get_paths().get('logs_dir', 'logs')

@@ -130,9 +125,64 @@ def get_base_url() -> str:
    return get_config().get('scraper', {}).get('base_url', "https://{region}.craigslist.org/search/jjj?query={keyword}&sort=rel")


-def ensure_cache_dir():
-    """Ensure cache directory exists."""
-    os.makedirs(get_cache_dir(), exist_ok=True)
+def get_negative_keywords() -> List[str]:
+    """Return normalized list of negative keywords from config."""
+    raw = get_config().get('scraper', {}).get('negative_keywords', [])
+    if not isinstance(raw, list):
+        return []
+    cleaned: List[str] = []
+    for item in raw:
+        if not isinstance(item, str):
+            continue
+        val = item.strip()
+        if not val:
+            continue
+        cleaned.append(val.lower())
+    return cleaned
+
+
+def get_email_settings() -> Dict[str, Any]:
+    """Return normalized email settings from config."""
+    cfg = get_config().get('email', {})
+    if not isinstance(cfg, dict):
+        cfg = {}
+    raw_smtp = cfg.get('smtp', {}) if isinstance(cfg.get('smtp'), dict) else {}
+    raw_recipients = cfg.get('recipients', [])
+
+    def _to_int(value, default):
+        try:
+            return int(value)
+        except (TypeError, ValueError):
+            return default
+
+    recipients: List[str] = []
+    if isinstance(raw_recipients, list):
+        for item in raw_recipients:
+            if isinstance(item, str):
+                addr = item.strip()
+                if addr:
+                    recipients.append(addr)
+
+    smtp = {
+        'host': (raw_smtp.get('host') or '').strip(),
+        'port': _to_int(raw_smtp.get('port', 587), 587),
+        'username': (raw_smtp.get('username') or '').strip(),
+        'password': raw_smtp.get('password') or '',
+        'use_tls': bool(raw_smtp.get('use_tls', True)),
+        'use_ssl': bool(raw_smtp.get('use_ssl', False)),
+        'timeout': _to_int(raw_smtp.get('timeout', 30), 30),
+    }
+    if smtp['port'] <= 0:
+        smtp['port'] = 587
+    if smtp['timeout'] <= 0:
+        smtp['timeout'] = 30
+
+    return {
+        'enabled': bool(cfg.get('enabled', False)),
+        'from_address': (cfg.get('from_address') or '').strip(),
+        'smtp': smtp,
+        'recipients': recipients,
+    }


 def now_iso() -> str:
@@ -174,12 +224,6 @@ def get_url_from_filename(name: str) -> str:
    return url_guess


-def get_cached_content(url: str) -> str:
-    """Get cached content for URL."""
-    with open(get_cache_path(url), "r", encoding="utf-8") as f:
-        return f.read()
-
-
 def safe_get_text(element, default="N/A"):
    """Safely extract text from BeautifulSoup element."""
    return element.get_text(strip=True) if element else default
@@ -195,53 +239,6 @@ def get_random_delay(min_delay: int = get_min_delay(), max_delay: int = get_max_
    return random.uniform(min_delay, max_delay)


-def get_cache_path(url: str) -> str:
-    """Get cache file path for URL."""
-    return os.path.join(get_cache_dir(), f"{get_filename_from_url(url)}.html")
-
-
-def cache_page(url: str, content: str):
-    """Cache the page content with a timestamp."""
-    cache_path = get_cache_path(url)
-    with open(cache_path, "w", encoding="utf-8") as f:
-        f.write(content)
-    # Update the file's modification time to the current time
-    os.utime(cache_path, None)
-
-
-def is_cached(url: str) -> bool:
-    """Check if the page is cached and not older than 24 hours."""
-    cache_path = get_cache_path(url)
-    if not os.path.isfile(cache_path):
-        return False
-
-    # Check the file's age if it's a search result page
-    if 'search' in url:
-        file_age = time.time() - os.path.getmtime(cache_path)
-        if file_age > 24 * 3600:  # 24 hours in seconds
-            return False
-
-    return True
-
-
-def is_cache_stale(last_modified: str, days: int = 1) -> bool:
-    """Check if the cached page is stale (older than 24 hours)."""
-    if not last_modified:
-        return True
-    last_datetime = datetime.fromisoformat(last_modified)
-    file_age = time.time() - last_datetime.timestamp()
-    return file_age > days * 24 * 3600  # days in seconds
-
-
-def delete_cached_page(url: str):
-    cache_fp = get_cache_path(url)
-    if os.path.exists(cache_fp):
-        try:
-            os.remove(cache_fp)
-        except Exception:
-            pass
-
-
 def get_color_from_string(s: str) -> str:
    """Generate a color code from a string."""
    hash_code = hash(s)
@@ -264,15 +261,41 @@ def get_color_from_string(s: str) -> str:

 def filter_jobs(
    jobs: List[Dict[str, Any]],
-    region: _Optional[str] = None,
-    keyword: _Optional[str] = None,
+    region: Optional[str] = None,
+    keyword: Optional[str] = None,
+    negative_keywords: Optional[List[str]] = None,
 ) -> List[Dict[str, Any]]:
-    """Filter jobs by optional region and keyword."""
+    """Filter jobs by optional region, keyword, and negative keywords."""
    filtered = jobs
    if region:
        filtered = [j for j in filtered if j.get("region") == region]
    if keyword:
        filtered = [j for j in filtered if j.get("keyword") == keyword]
+    if negative_keywords:
+        # Pre-compile regexes or just check substring?
+        # Scraper uses substring check. Let's do the same for consistency.
+        # Fields to check: title, company, location, description
+        # Note: description might contain HTML or be long.
+
+        # Normalize negative keywords
+        nks = [nk.lower() for nk in negative_keywords if nk]
+
+        def is_clean(job):
+            # Check all fields
+            text_blob = " ".join([
+                str(job.get("title") or ""),
+                str(job.get("company") or ""),
+                str(job.get("location") or ""),
+                str(job.get("description") or "")
+            ]).lower()
+
+            for nk in nks:
+                if nk in text_blob:
+                    return False
+            return True
+
+        filtered = [j for j in filtered if is_clean(j)]
+
    return filtered
Author	SHA1	Message	Date
zwitschi	f8a5b1b5ef	fix: update scrape function to handle HTML response and improve status messages Some checks failed CI/CD Pipeline / test (push) Successful in 1m34s Details CI/CD Pipeline / build-image (push) Failing after 1m53s Details	2025-11-30 10:51:16 +01:00
zwitschi	02e3e77f78	fix: update fetch logic to skip jobs fetched within the last 24 hours and adjust retry attempts in scraper Some checks failed CI/CD Pipeline / test (push) Failing after 20s Details CI/CD Pipeline / build-image (push) Has been skipped Details	2025-11-28 20:54:39 +01:00
zwitschi	e0bc295936	feat: Enhance CI/CD pipeline with Docker image build and push steps Some checks failed CI/CD Pipeline / test (push) Successful in 21s Details CI/CD Pipeline / build-image (push) Failing after 1m9s Details	2025-11-28 19:16:28 +01:00
zwitschi	2185a07ff0	feat: Implement email sending utilities and templates for job notifications Some checks failed CI/CD Pipeline / test (push) Failing after 4m9s Details - Added email_service.py for sending emails with SMTP configuration. - Introduced email_templates.py to render job alert email subjects and bodies. - Enhanced scraper.py to extract contact information from job listings. - Updated settings.js to handle negative keyword input validation. - Created email.html and email_templates.html for managing email subscriptions and templates in the admin interface. - Modified base.html to include links for email alerts and templates. - Expanded user settings.html to allow management of negative keywords. - Updated utils.py to include functions for retrieving negative keywords and email settings. - Enhanced job filtering logic to exclude jobs containing negative keywords.	2025-11-28 18:15:08 +01:00
zwitschi	8afb208985	fix: update .gitignore to include GitHub Copilot files and add TODO.md Some checks failed CI/CD Pipeline / test (push) Failing after 3m35s Details	2025-11-03 19:04:34 +01:00
zwitschi	d9a224fc36	fix: remove redundant checkout step from CI/CD pipeline Some checks failed CI/CD Pipeline / test (push) Failing after 32s Details	2025-11-01 19:47:41 +01:00
zwitschi	1678e1366e	fix: remove unnecessary build and push jobs from CI/CD pipeline Some checks failed CI/CD Pipeline / test (push) Has been cancelled Details	2025-11-01 19:47:02 +01:00
zwitschi	fee955f01d	fix: update Dockerfile and documentation for APT_CACHER_NG configuration Some checks failed CI/CD Pipeline / test (push) Successful in 18s Details CI/CD Pipeline / build (push) Successful in 1m3s Details CI/CD Pipeline / push (push) Failing after 35s Details	2025-11-01 19:41:58 +01:00
zwitschi	a51d500777	feat: enhance CI/CD pipeline with build and push steps for Docker images Some checks failed CI/CD Pipeline / test (push) Successful in 18s Details CI/CD Pipeline / build (push) Successful in 16s Details CI/CD Pipeline / push (push) Failing after 5s Details	2025-11-01 19:04:05 +01:00
zwitschi	2238a286d4	fix: cexclude linting and deployment for now All checks were successful CI/CD Pipeline / test (push) Successful in 3m57s Details	2025-11-01 18:35:07 +01:00
zwitschi	92b6efb550	fix: adjust exponential backoff timing in scrape_jobs_with_retry Some checks failed CI/CD Pipeline / test (push) Failing after 42s Details CI/CD Pipeline / deploy (push) Has been skipped Details	2025-11-01 18:31:35 +01:00
zwitschi	f48f5dc036	fix: missing variable in job_details()	2025-11-01 18:24:37 +01:00
zwitschi	053a9988a8	feat: add CI pipeline	2025-11-01 18:07:57 +01:00
zwitschi	504dc8e2b0	implement automated job scraping scheduler with retry logic and logging	2025-11-01 18:00:59 +01:00
zwitschi	8e3a6f4f41	add logs table definition to database schema	2025-11-01 16:10:42 +01:00
zwitschi	5cbb760005	separate javascript for scrape page	2025-09-17 17:12:29 +02:00
zwitschi	2ae1e2058d	reorganize imports removing unused imports	2025-09-17 17:12:16 +02:00
zwitschi	e549fae3f6	fix table setup	2025-09-17 17:11:45 +02:00
zwitschi	c4761c257c	fix handling of db result -> dict	2025-09-17 16:01:21 +02:00
zwitschi	b6f9d39ad8	extending logging	2025-09-17 15:53:28 +02:00
zwitschi	94730439a2	extending logging to individual jobs	2025-09-17 15:35:12 +02:00
zwitschi	39900ea564	do not remove stale urls	2025-09-17 15:30:29 +02:00
zwitschi	e26dc9c164	updating db logic adding logging	2025-09-17 15:24:59 +02:00
zwitschi	c4a5ed56b5	adding statistics page for admin	2025-09-14 17:07:05 +02:00
zwitschi	e947520be9	correct db in docker entrypoint	2025-09-14 16:23:00 +02:00
zwitschi	89d38c91cf	simpler Dockerfile and compose test	2025-09-14 16:10:11 +02:00
zwitschi	e7c7861b52	change Dockerfile to bookwork	2025-09-14 15:30:10 +02:00
zwitschi	dd9772997d	extending Dockerfile with lang and locale settings changing debian mirror for faster installation	2025-09-14 15:20:00 +02:00
zwitschi	1a3e6ce3de	docker compose test file	2025-09-08 18:53:54 +02:00
zwitschi	9e18323e5f	adjusting for exsting traefik host	2025-09-08 18:34:10 +02:00
zwitschi	7c743a56cc	port change	2025-09-08 17:56:44 +02:00
zwitschi	8a40e7115f	adjusting compose file	2025-09-08 17:32:11 +02:00
zwitschi	56315fe147	modified docker-compose.yml	2025-09-08 17:18:11 +02:00
zwitschi	0f5c2fcf31	extending docker setup README	2025-09-08 17:16:53 +02:00
zwitschi	a369972119	fixing docker compose	2025-09-08 15:36:17 +02:00
zwitschi	008eb7906b	fix formatting	2025-09-08 14:59:32 +02:00
georg.sinn-schirwitz	5940e1f8b4	Docker functionality	2025-09-08 14:51:04 +02:00
georg.sinn-schirwitz	042a196718	remove caching	2025-09-08 14:44:46 +02:00
georg.sinn-schirwitz	f8e23d0fba	reverting fix: errors	2025-09-01 14:04:35 +02:00
georg.sinn-schirwitz	2201185599	fix rowcount	2025-09-01 14:00:26 +02:00
georg.sinn-schirwitz	fe2a579fc4	completing user administration	2025-08-30 18:33:08 +02:00
georg.sinn-schirwitz	7379d3040d	refactor imports	2025-08-30 16:19:26 +02:00
georg.sinn-schirwitz	932a85e279	rename password field	2025-08-30 16:19:17 +02:00