From 23a67d7fe1a41c305b9b6dd1580af0d2cc304896 Mon Sep 17 00:00:00 2001
From: "georg.sinn-schirwitz" <georg.sinn-schirwitz@in-factory.com>
Date: Fri, 29 Aug 2025 15:07:58 +0200
Subject: [PATCH] initial project commit

---
 .vscode/settings.json             |   5 +
 analytics.py                      |  82 +++
 config/keywords.txt               |   7 +
 config/regions.txt                |   6 +
 config/settings.json              |  32 ++
 main.py                           |  11 +
 online.md                         | 213 +++++++
 pytest.ini                        |   5 +
 requirements.txt                  |   9 +
 setup.py                          |  53 ++
 tests/conftest.py                 |   7 +
 tests/test_db_integration.py      | 152 +++++
 tests/test_users.py               |  19 +
 tests/test_utils_config.py        |  18 +
 web/__init__.py                   |   0
 web/app.py                        | 457 +++++++++++++++
 web/craigslist.py                 | 147 +++++
 web/db.py                         | 906 ++++++++++++++++++++++++++++++
 web/scraper.py                    | 121 ++++
 web/static/index.js               | 102 ++++
 web/static/settings.js            |  61 ++
 web/static/styles.css             | 144 +++++
 web/static/taxonomy.js            |  41 ++
 web/templates/admin/login.html    |   9 +
 web/templates/admin/taxonomy.html | 142 +++++
 web/templates/admin/users.html    | 139 +++++
 web/templates/base.html           |  43 ++
 web/templates/index.html          |  55 ++
 web/templates/job.html            |  27 +
 web/templates/user/settings.html  |  84 +++
 web/utils.py                      | 336 +++++++++++
 31 files changed, 3433 insertions(+)
 create mode 100644 .vscode/settings.json
 create mode 100644 analytics.py
 create mode 100644 config/keywords.txt
 create mode 100644 config/regions.txt
 create mode 100644 config/settings.json
 create mode 100644 main.py
 create mode 100644 online.md
 create mode 100644 pytest.ini
 create mode 100644 requirements.txt
 create mode 100644 setup.py
 create mode 100644 tests/conftest.py
 create mode 100644 tests/test_db_integration.py
 create mode 100644 tests/test_users.py
 create mode 100644 tests/test_utils_config.py
 create mode 100644 web/__init__.py
 create mode 100644 web/app.py
 create mode 100644 web/craigslist.py
 create mode 100644 web/db.py
 create mode 100644 web/scraper.py
 create mode 100644 web/static/index.js
 create mode 100644 web/static/settings.js
 create mode 100644 web/static/styles.css
 create mode 100644 web/static/taxonomy.js
 create mode 100644 web/templates/admin/login.html
 create mode 100644 web/templates/admin/taxonomy.html
 create mode 100644 web/templates/admin/users.html
 create mode 100644 web/templates/base.html
 create mode 100644 web/templates/index.html
 create mode 100644 web/templates/job.html
 create mode 100644 web/templates/user/settings.html
 create mode 100644 web/utils.py

diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..d969f96
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+  "python.testing.pytestArgs": ["tests"],
+  "python.testing.unittestEnabled": false,
+  "python.testing.pytestEnabled": true
+}
diff --git a/analytics.py b/analytics.py
new file mode 100644
index 0000000..d17c1ee
--- /dev/null
+++ b/analytics.py
@@ -0,0 +1,82 @@
+import pandas as pd
+from sqlalchemy import create_engine, text
+from web.utils import get_mysql_config
+
+
+def get_engine():
+    cfg = get_mysql_config()
+    url = f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg['port']}/{cfg['database']}?charset=utf8mb4"
+    return create_engine(url, future=True)
+
+
+def get_all_jobs():
+    query = """
+SELECT l.job_id
+,l.title
+,d.description
+,l.region
+,l.keyword
+,d.company
+,l.location
+,l.timestamp
+,d.posted_time
+,l.url
+,c.file_path
+,c.last_modified
+,c.url_guess
+,CASE WHEN c.url_guess != l.url THEN 1 ELSE 0 END AS url_guess_stale
+FROM job_listings AS l
+INNER JOIN job_descriptions AS d 
+ON l.job_id = d.job_id
+AND l.url = d.url
+LEFT JOIN cached_pages AS c ON l.job_id = c.job_id
+ORDER BY d.posted_time DESC
+    """
+    engine = get_engine()
+    with engine.begin() as conn:
+        rows = conn.execute(text(query)).fetchall()
+    return [
+        {
+            "job_id": row[0],
+            "title": row[1],
+            "description": row[2],
+            "region": row[3],
+            "keyword": row[4],
+            "company": row[5],
+            "location": row[6],
+            "timestamp": row[7],
+            "posted_time": row[8],
+            "url": row[9],
+            "file_path": row[10],
+            "last_modified": row[11],
+            "url_guess": row[12],
+            "url_guess_stale": row[13],
+        }
+        for row in rows
+    ]
+
+
+def main():
+    """Main function to load and display job postings."""
+    jobs_df = pd.DataFrame(get_all_jobs())
+
+    print(jobs_df.head())
+    print(f"Total postings: {len(jobs_df)}")
+
+    print("Regions:")
+    print(jobs_df['region'].value_counts())
+
+    print("Keywords:")
+    print(jobs_df['keyword'].value_counts())
+
+    # print("Sample Job Postings:")
+    # print("-" * 40)
+    # for sample in jobs_df[['region', 'keyword', 'title', 'location', 'description']].sample(5).itertuples():
+    #     print(
+    #         f"Region: {sample.region}, Keyword: {sample.keyword}, Title: {sample.title}, Location: {sample.location}")
+    #     print(sample.description)
+    #     print("-" * 40)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/config/keywords.txt b/config/keywords.txt
new file mode 100644
index 0000000..87c5f84
--- /dev/null
+++ b/config/keywords.txt
@@ -0,0 +1,7 @@
+handyman
+damage mitigation
+restoration
+house manager
+house sitter
+property manager
+live-in
\ No newline at end of file
diff --git a/config/regions.txt b/config/regions.txt
new file mode 100644
index 0000000..f9e6b79
--- /dev/null
+++ b/config/regions.txt
@@ -0,0 +1,6 @@
+losangeles
+sandiego
+orangecounty
+inlandempire
+bakersfield
+ventura
diff --git a/config/settings.json b/config/settings.json
new file mode 100644
index 0000000..880399d
--- /dev/null
+++ b/config/settings.json
@@ -0,0 +1,32 @@
+{
+  "database": {
+    "mysql": {
+      "host": "192.168.88.37",
+      "user": "jobs",
+      "password": "jobdb",
+      "database": "jobs",
+      "port": 3306
+    }
+  },
+  "http": {
+    "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:141.0) Gecko/20100101 Firefox/141.0",
+    "request_timeout": 30,
+    "max_retries": 3,
+    "backoff_factor": 2,
+    "min_delay": 1,
+    "max_delay": 5
+  },
+  "paths": {
+    "cache_dir": "cache",
+    "logs_dir": "logs"
+  },
+  "scraper": {
+    "base_url": "https://{region}.craigslist.org/search/jjj?query={keyword}&sort=rel",
+    "config_dir": "config"
+  },
+  "users": [
+    { "username": "anonymous", "is_admin": false, "password": "" },
+    { "username": "admin", "is_admin": true, "password": "M11ffpgm." },
+    { "username": "bobby", "is_admin": false, "password": "" }
+  ]
+}
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..c0dc8ae
--- /dev/null
+++ b/main.py
@@ -0,0 +1,11 @@
+"""
+main entry point for the Craigslist scraper
+starts webserver
+"""
+
+import web.app as app
+
+
+if __name__ == "__main__":
+    # start web server
+    app.main()
diff --git a/online.md b/online.md
new file mode 100644
index 0000000..3ae9784
--- /dev/null
+++ b/online.md
@@ -0,0 +1,213 @@
+# Guide to get this online
+
+Awesome—here’s a concise, battle-tested set of steps to get your Flask app running behind Traefik on Proxmox, with the code living on your NAS via NFS and exposed as `jobs.allucanget.biz`.
+
+---
+
+# 0) DNS & inbound ports
+
+1. In Cloudflare, add an **A** record
+
+   - **Name:** `jobs`
+   - **IPv4:** `5.226.148.100`
+   - Decide one of these:
+   - **Option A (simpler):** set to **DNS only** (grey cloud). Traefik will get Let’s Encrypt certs itself.
+   - **Option B:** keep **Proxied** (orange cloud) and install a **Cloudflare Origin Certificate** on Traefik (Full/Strict).
+
+2. Ensure your router/NAT forwards **80/tcp** and **443/tcp** from WAN to your Traefik box at `192.168.88.10`.
+
+---
+
+# 1) Mount the NAS on the Proxmox host (then bind into the container)
+
+> This avoids NFS-in-container headaches and makes the directory accessible outside the LXC too.
+
+On the **Proxmox host** (not inside a container):
+
+```bash
+# Create a mount point for the NAS share
+sudo mkdir -p /mnt/nas
+
+# Install NFS client (Debian-based Proxmox)
+sudo apt update && sudo apt install -y nfs-common
+
+# (Optional) test mount once
+sudo mount -t nfs 192.168.88.9:/mnt/HD/HD_a2/NASNFS /mnt/nas
+
+# Make it permanent in /etc/fstab (add this line):
+echo '192.168.88.9:/mnt/HD/HD_a2/NASNFS  /mnt/nas  nfs  defaults,_netdev  0  0' | sudo tee -a /etc/fstab
+
+# Re-mount from fstab to verify
+sudo umount /mnt/nas || true
+sudo mount -a
+
+# Create your app directory on the NAS that’s visible outside the container too
+sudo mkdir -p /mnt/nas/jobs-app
+sudo chown -R 1000:1000 /mnt/nas/jobs-app   # or set to whatever UID/GID you want
+```
+
+---
+
+# 2) Create the LXC on Proxmox
+
+1. Download a Debian 12 template (UI: **Datacenter → local → CT Templates → Debian 12**)
+
+2. Create an **unprivileged** LXC (example CTID **201**), with:
+
+   - CPU/RAM as needed (e.g., 2 vCPU / 2–4 GB RAM)
+   - Root disk: 8–16 GB is fine
+   - Network: static IP, e.g. `192.168.88.20/24`, GW `192.168.88.1`
+   - Features: you don’t need NFS inside; we’ll bind-mount from host.
+
+3. Bind-mount the NAS app dir into the container:
+
+```bash
+# From the Proxmox host:
+pct set 201 -mp0 /mnt/nas/jobs-app,mp=/srv/app
+```
+
+4. Start the container:
+
+```bash
+pct start 201
+```
+
+---
+
+# 3) Prepare Python + Gunicorn inside the LXC
+
+```bash
+# Inside CT 201 (ssh or pct enter 201)
+apt update && apt install -y python3-venv python3-pip build-essential
+
+# Your code directory is the NAS mount:
+cd /srv/app
+python3 -m venv .venv
+. .venv/bin/activate
+pip install --upgrade pip
+pip install flask gunicorn
+```
+
+Minimal Flask app structure (in `/srv/app`):
+
+```
+/srv/app
+├─ app.py
+└─ wsgi.py
+```
+
+`app.py`:
+
+```python
+from flask import Flask
+app = Flask(__name__)
+
+@app.get("/")
+def hello():
+    return "Hello from jobs.allucanget.biz!"
+```
+
+`wsgi.py`:
+
+```python
+from app import app
+
+if __name__ == "__main__":
+    app.run()
+```
+
+Test locally (inside CT):
+
+```bash
+. .venv/bin/activate
+gunicorn -b 0.0.0.0:8000 wsgi:app
+# Visit http://192.168.88.20:8000 from LAN to confirm
+```
+
+Create a systemd service so it starts on boot:
+
+```bash
+cat >/etc/systemd/system/jobs.service <<'EOF'
+[Unit]
+Description=Jobs Flask app (gunicorn)
+After=network.target
+
+[Service]
+User=root
+WorkingDirectory=/srv/app
+Environment="PATH=/srv/app/.venv/bin"
+ExecStart=/srv/app/.venv/bin/gunicorn -b 0.0.0.0:8000 wsgi:app
+Restart=always
+
+[Install]
+WantedBy=multi-user.target
+EOF
+
+systemctl daemon-reload
+systemctl enable --now jobs.service
+systemctl status jobs.service --no-pager
+```
+
+---
+
+# 4) Wire Traefik to the LXC service
+
+Assumptions:
+
+- Traefik runs at `192.168.88.10`
+- You have a **file provider** mounted, e.g. `/etc/traefik/dynamic/`
+
+Create a dynamic config file on the Traefik container host (or inside the Traefik container if that’s where the file provider lives), e.g. `/etc/traefik/dynamic/jobs.yml`:
+
+```yaml
+http:
+  routers:
+    jobs-router:
+      rule: "Host(`jobs.allucanget.biz`)"
+      entryPoints:
+        - websecure
+      service: jobs-svc
+      tls:
+        # Option A: Let’s Encrypt (Traefik must be set up with a certResolver in static config)
+        certResolver: letsencrypt
+        # Option B (Cloudflare proxied + origin cert): omit certResolver and make sure Traefik serves the CF origin cert
+
+  services:
+    jobs-svc:
+      loadBalancer:
+        servers:
+          - url: "http://192.168.88.20:8000"
+```
+
+Reload Traefik (or it auto-watches the file). Make sure Traefik’s **static** config has:
+
+- `entryPoints.websecure.address=:443`
+- (If using Let’s Encrypt) a `certResolver` (HTTP-01 or DNS-01) configured.
+
+  - With **Cloudflare “DNS only”**, HTTP-01 is easiest.
+  - With **Cloudflare proxied**, use a Cloudflare **Origin Certificate** on Traefik (Full/Strict) or use DNS-01.
+
+Optional (X-Forwarded-For): behind Cloudflare, trust CF IP ranges in Traefik so client IPs are correct.
+
+---
+
+# 5) Quick checks
+
+- `curl -I http://192.168.88.20:8000` from Traefik host should return `200`.
+- `curl -I https://jobs.allucanget.biz` from outside should return `200` and a valid cert.
+- Confirm Cloudflare SSL mode:
+
+  - **If DNS only:** Traefik should have a Let’s Encrypt cert.
+  - **If proxied:** Traefik should present the **CF Origin Cert**, and Cloudflare set to **Full (strict)**.
+
+---
+
+# 6) (Nice to have) Security & hardening
+
+- Create a non-root user to run the service; adjust `User=` and file permissions.
+- Set a firewall rule so port **8000** on the LXC is only reachable from `192.168.88.10` (Traefik) and your admin IPs.
+- Keep `/srv/app` on NAS backed up (snapshot on the NAS).
+
+---
+
+That’s it. If you want, tell me which option you picked for Cloudflare (DNS only vs proxied), and I’ll give you the exact Traefik static TLS snippet to match.
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..e15eca8
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,5 @@
+[pytest]
+minversion = 7.0
+addopts = -q
+testpaths = tests
+python_files = test_*.py
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..0dc108d
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,9 @@
+beautifulsoup4
+flask
+flask-wtf
+pandas
+pytest
+requests
+sqlalchemy
+pymysql
+gunicorn
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..63b5e96
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+"""
+MySQL utilities for Craigslist project.
+
+Usage (PowerShell):
+  # Ensure MySQL database and tables
+  python setup.py mysql-init
+
+  # Show row counts (MySQL only)
+  python setup.py counts
+"""
+import sys
+from sqlalchemy import create_engine, text
+from web.utils import get_mysql_config
+
+cmd = sys.argv[1] if len(sys.argv) > 1 else "help"
+
+if cmd == "mysql-init":
+    cfg = get_mysql_config()
+    root_url = f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg['port']}/"
+    dbname = cfg["database"]
+    root_engine = create_engine(root_url, future=True)
+    with root_engine.begin() as conn:
+        conn.execute(text(
+            f"CREATE DATABASE IF NOT EXISTS `{dbname}` CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci"))
+    mysql_url = f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg['port']}/{dbname}?charset=utf8mb4"
+    mysql_engine = create_engine(mysql_url, future=True)
+    from web.db import Base
+    Base.metadata.create_all(mysql_engine)
+    print("MySQL database and tables ensured")
+elif cmd == "counts":
+    cfg = get_mysql_config()
+    url = f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg['port']}/{cfg['database']}?charset=utf8mb4"
+    engine = create_engine(url, future=True)
+    with engine.begin() as conn:
+        for table in [
+            "users",
+            "regions",
+            "keywords",
+            "user_regions",
+            "user_keywords",
+            "job_listings",
+            "job_descriptions",
+            "cached_pages",
+            "user_interactions",
+        ]:
+            try:
+                n = conn.execute(text(f"SELECT COUNT(*) FROM {table}"))
+                print(f"{table}: {list(n)[0][0]}")
+            except Exception as e:
+                print(f"{table}: error {e}")
+else:
+    print(__doc__)
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..6c2cfb8
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,7 @@
+import sys
+from pathlib import Path
+
+# Ensure project root is on sys.path so `web` package can be imported
+root = Path(__file__).resolve().parents[1]
+if str(root) not in sys.path:
+    sys.path.insert(0, str(root))
diff --git a/tests/test_db_integration.py b/tests/test_db_integration.py
new file mode 100644
index 0000000..738ff05
--- /dev/null
+++ b/tests/test_db_integration.py
@@ -0,0 +1,152 @@
+import os
+import time
+from datetime import datetime, timezone
+
+import pytest
+
+import web.db as db
+from web.utils import now_iso
+
+
+# Skip this entire test module unless explicitly enabled and DB is reachable
+if not os.getenv("RUN_DB_TESTS"):
+    pytest.skip("Set RUN_DB_TESTS=1 to run MySQL integration tests",
+                allow_module_level=True)
+
+
+@pytest.fixture(scope="module")
+def db_ready():
+    try:
+        db.db_init()
+        return True
+    except Exception as e:
+        pytest.skip(f"MySQL DB not reachable or init failed: {e}")
+
+
+def unique_suffix() -> str:
+    # Time-based unique suffix for test records
+    return datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S%f")
+
+
+def test_user_create_and_auth(db_ready):
+    uname = f"it_user_{unique_suffix()}"
+    pw = "P@ssw0rd!"
+    uid = db.create_or_update_user(
+        uname, password=pw, is_admin=False, is_active=True)
+    assert isinstance(uid, int)
+    assert db.verify_user_credentials(uname, pw) is True
+    assert db.verify_user_credentials(uname, "wrong") is False
+
+
+def test_regions_keywords_upsert_and_list(db_ready):
+    rname = f"it_region_{unique_suffix()}"
+    kname = f"it_keyword_{unique_suffix()}"
+    rid = db.upsert_region(rname)
+    kid = db.upsert_keyword(kname)
+    assert isinstance(rid, int) and rid > 0
+    assert isinstance(kid, int) and kid > 0
+    regions = db.get_all_regions()
+    keywords = db.get_all_keywords()
+    assert rname in regions
+    assert kname in keywords
+
+
+def test_user_preferences_roundtrip(db_ready):
+    uname = f"it_pref_user_{unique_suffix()}"
+    db.create_or_update_user(uname, is_active=True)
+    r1, r2 = f"it_r1_{unique_suffix()}", f"it_r2_{unique_suffix()}"
+    k1, k2 = f"it_k1_{unique_suffix()}", f"it_k2_{unique_suffix()}"
+    # Set preferences (upserts regions/keywords internally if missing)
+    db.set_user_regions(uname, [r1, r2])
+    db.set_user_keywords(uname, [k1, k2])
+    assert set(db.get_user_regions(uname)) >= {r1, r2}
+    assert set(db.get_user_keywords(uname)) >= {k1, k2}
+
+
+def test_upsert_listing_details_and_urls(db_ready):
+    # Create a unique URL
+    jid_suffix = unique_suffix()
+    url = f"https://example.org/it/{jid_suffix}.html"
+    db.upsert_listing(
+        url=url,
+        region="it",
+        keyword="integration",
+        title=f"IT Listing {jid_suffix}",
+        pay="N/A",
+        location="Test City",
+        timestamp=now_iso(),
+    )
+    job_data = {
+        "url": url,
+        "title": f"IT Job {jid_suffix}",
+        "company": "Acme Corp",
+        "location": "Test City",
+        "description": "A test job for integration",
+        "id": jid_suffix,  # normalize_job_id should use this or fall back to URL
+        "posted_time": now_iso(),
+    }
+    db.upsert_job_details(job_data)
+    urls = db.db_get_all_job_urls()
+    assert url in urls
+    # Cleanup (best-effort)
+    try:
+        db.db_delete_job(jid_suffix)
+    except Exception:
+        pass
+
+
+def test_cached_page_upsert_and_get(db_ready):
+    jid_suffix = unique_suffix()
+    url = f"https://example.org/it/{jid_suffix}.html"
+    # Ensure a listing exists for FK relation if enforced
+    db.upsert_listing(
+        url=url,
+        region="it",
+        keyword="cache",
+        title=f"IT Cache {jid_suffix}",
+        pay="N/A",
+        location="Test City",
+        timestamp=now_iso(),
+    )
+    fp = f"/tmp/integration_{jid_suffix}.html"
+    db.upsert_cached_page(
+        file_path=fp,
+        url_guess=url,
+        last_modified=now_iso(),
+        size_bytes=123,
+        job_id=int(jid_suffix) if jid_suffix.isdigit() else None,
+    )
+    row = db.db_get_cache_url(url)
+    if row is not None:
+        assert row["url_guess"] == url
+    # Cleanup
+    try:
+        db.remove_cached_page(fp)
+        db.db_remove_cached_url(url)
+        db.db_delete_job(jid_suffix)
+    except Exception:
+        pass
+
+
+def test_user_interactions_mark_and_visit(db_ready):
+    uname = f"it_user_{unique_suffix()}"
+    db.create_or_update_user(uname, is_active=True)
+    jid_suffix = unique_suffix()
+    url = f"https://example.org/it/{jid_suffix}.html"
+    db.upsert_listing(
+        url=url,
+        region="it",
+        keyword="interact",
+        title=f"IT Interact {jid_suffix}",
+        pay="N/A",
+        location="Test City",
+        timestamp=now_iso(),
+    )
+    # Exercise helpers — absence of exceptions is success for integration
+    db.mark_favorite(jid_suffix, uname, True)
+    db.record_visit(jid_suffix, uname, url=url)
+    # Cleanup
+    try:
+        db.db_delete_job(jid_suffix)
+    except Exception:
+        pass
diff --git a/tests/test_users.py b/tests/test_users.py
new file mode 100644
index 0000000..5d0e378
--- /dev/null
+++ b/tests/test_users.py
@@ -0,0 +1,19 @@
+import pytest
+from web.db import db_init, create_or_update_user, verify_user_credentials, get_users
+from web.utils import initialize_users_from_settings
+
+
+def test_initialize_users_from_settings():
+    db_init()
+    n = initialize_users_from_settings()
+    assert n >= 1  # should at least add 'anonymous'
+    users = get_users()
+    assert any(u['username'] == 'anonymous' for u in users)
+
+
+def test_create_and_auth_user():
+    db_init()
+    create_or_update_user('testuser', password='secret',
+                          is_admin=True, is_active=True)
+    assert verify_user_credentials('testuser', 'secret') is True
+    assert verify_user_credentials('testuser', 'wrong') is False
diff --git a/tests/test_utils_config.py b/tests/test_utils_config.py
new file mode 100644
index 0000000..d913a64
--- /dev/null
+++ b/tests/test_utils_config.py
@@ -0,0 +1,18 @@
+import importlib
+import os
+
+import web.utils as utils
+
+
+def test_config_loaded():
+    cfg = utils.get_config()
+    assert isinstance(cfg, dict)
+
+
+def test_http_settings_helpers():
+    assert isinstance(utils.get_user_agent(), str)
+    assert isinstance(utils.get_request_timeout(), int)
+    assert isinstance(utils.get_max_retries(), int)
+    assert isinstance(utils.get_backoff_factor(), int)
+    assert isinstance(utils.get_min_delay(), int)
+    assert isinstance(utils.get_max_delay(), int)
diff --git a/web/__init__.py b/web/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/web/app.py b/web/app.py
new file mode 100644
index 0000000..e69a789
--- /dev/null
+++ b/web/app.py
@@ -0,0 +1,457 @@
+import os
+from flask import Flask, request, jsonify, render_template, redirect, url_for, session, flash
+from flask_wtf import CSRFProtect
+from typing import Dict, List
+
+from web.craigslist import scraper
+from web.db import (
+    db_init,
+    get_all_jobs,
+    mark_favorite,
+    record_visit,
+    get_users,
+    create_or_update_user,
+    verify_user_credentials,
+    get_user,
+    get_user_regions,
+    get_user_keywords,
+    set_user_regions,
+    set_user_keywords,
+    get_all_regions,
+    get_all_keywords,
+    upsert_region,
+    upsert_keyword,
+    list_regions_full,
+    list_keywords_full,
+    rename_region,
+    rename_keyword,
+    change_region_color,
+    change_keyword_color
+)
+from web.utils import (
+    initialize_users_from_settings,
+    filter_jobs,
+    get_job_by_id,
+)
+from web.db import get_all_regions, get_all_keywords
+
+app = Flask(__name__)
+app.secret_key = os.environ.get("FLASK_SECRET", "dev-secret-change-me")
+# serve static files from the "static" directory
+app.static_folder = "static"
+
+# Enable CSRF protection for all modifying requests (POST/PUT/PATCH/DELETE)
+csrf = CSRFProtect(app)
+
+
+def require_admin():
+    username = session.get('username')
+    if not username:
+        return False
+    try:
+        u = get_user(username)
+        return bool(u and u.get('is_admin') and u.get('is_active'))
+    except Exception:
+        return False
+
+
+def require_login():
+    return bool(session.get('username'))
+
+
+@app.context_processor
+def inject_user_context():
+    username = session.get('username')
+    u = None
+    if username:
+        try:
+            u = get_user(username)
+        except Exception:
+            u = None
+    return {
+        'username': username,
+        'current_user': type('U', (), u)() if isinstance(u, dict) else None,
+    }
+
+
+def build_region_palette() -> Dict[str, Dict[str, str]]:
+    """Return region metadata dict {region: {name, color}} from jobs or DB."""
+    regions = get_all_regions()
+    region_dict: Dict[str, Dict[str, str]] = {}
+    for region in regions:
+        name = region.get('name', '')
+        color = region.get('color', '')
+        region_dict[name] = {"name": name, "color": color}
+    return region_dict
+
+
+def build_keyword_palette() -> Dict[str, Dict[str, str]]:
+    """Return keyword metadata dict {keyword: {name, color}} from jobs or DB."""
+    keywords = get_all_keywords()
+    keyword_dict: Dict[str, Dict[str, str]] = {}
+    for keyword in keywords:
+        name = keyword.get('name', '').replace(
+            ' ', '').lower()
+        color = keyword.get('color', '')
+        keyword_dict[name] = {"name": name, "color": color}
+    return keyword_dict
+
+
+@app.route('/', methods=['GET'])
+def index():
+    title = "Bobby Job Listings"
+    all_jobs = get_all_jobs()
+    # Apply user preference filters if no explicit filters provided
+    selected_region = request.args.get("region")
+    selected_keyword = request.args.get("keyword")
+    if not selected_region and session.get('username'):
+        try:
+            prefs = get_user_regions(session['username'])
+            if prefs:
+                # If user has region prefs, filter to them by default
+                all_jobs = [j for j in all_jobs if j.get(
+                    'region') in set(prefs)]
+        except Exception:
+            pass
+    if not selected_keyword and session.get('username'):
+        try:
+            prefs = get_user_keywords(session['username'])
+            if prefs:
+                all_jobs = [j for j in all_jobs if j.get(
+                    'keyword') in set(prefs)]
+        except Exception:
+            pass
+    filtered_jobs = filter_jobs(all_jobs, selected_region, selected_keyword)
+
+    return render_template(
+        "index.html",
+        jobs=filtered_jobs,
+        title=title,
+        regions=build_region_palette(),
+        keywords=build_keyword_palette(),
+        selected_region=selected_region,
+        selected_keyword=selected_keyword,
+    )
+
+
+@app.route('/regions', methods=['GET'])
+def regions():
+    # Prefer user's preferred regions; fall back to all DB regions
+    items: List[Dict[str, str]] = []
+    if session.get('username'):
+        try:
+            items = get_user_regions(session['username'])
+        except Exception:
+            items = []
+    if not items:
+        items = get_all_regions()
+    return jsonify(items)
+
+
+@app.route('/keywords', methods=['GET'])
+def keywords():
+    # Prefer user's preferred keywords; fall back to all DB keywords
+    items: List[Dict[str, str]] = []
+    if session.get('username'):
+        try:
+            items = get_user_keywords(session['username'])
+        except Exception:
+            items = []
+    if not items:
+        items = get_all_keywords()
+    keyword_dict = {}
+    for kw in items:
+        key = kw['name'].replace(' ', '').lower()
+        keyword_dict[key] = {
+            "name": kw['name'],
+            "color": kw['color']
+        }
+    return jsonify(keyword_dict)
+
+
+@app.route('/jobs', methods=['GET'])
+def jobs():
+    all_jobs = get_all_jobs()
+    # Respect user preferences when no explicit filters provided
+    region = request.args.get("region")
+    keyword = request.args.get("keyword")
+    if not region and session.get('username'):
+        try:
+            prefs = get_user_regions(session['username'])
+            if prefs:
+                all_jobs = [j for j in all_jobs if j.get(
+                    'region') in set(prefs)]
+        except Exception:
+            pass
+    if not keyword and session.get('username'):
+        try:
+            prefs = get_user_keywords(session['username'])
+            if prefs:
+                all_jobs = [j for j in all_jobs if j.get(
+                    'keyword') in set(prefs)]
+        except Exception:
+            pass
+    return jsonify(filter_jobs(all_jobs, region, keyword))
+
+
+@app.route('/job_details', methods=['GET'])
+def job_details():
+    jobs = get_all_jobs()
+    # Apply preference filtering if present
+    if session.get('username'):
+        try:
+            r = set(get_user_regions(session['username']))
+            k = set(get_user_keywords(session['username']))
+            if r:
+                jobs = [j for j in jobs if j.get('region') in r]
+            if k:
+                jobs = [j for j in jobs if j.get('keyword') in k]
+        except Exception:
+            pass
+    return jsonify(jobs)
+
+
+@app.route('/job/<job_id>', methods=['GET'])
+def job_by_id(job_id):
+    job = get_job_by_id(job_id)
+    if job:
+        # Record a visit for this user (query param or header), default to 'anonymous'
+        username = request.args.get("username") or request.headers.get(
+            "X-Username") or "anonymous"
+        try:
+            record_visit(str(job.get('id') or job_id),
+                         username=username, url=job.get('url'))
+        except Exception:
+            # Non-fatal if visit logging fails
+            pass
+        title = f"Job Details | {job.get('title', 'Unknown')} | ID {job.get('id', '')}"
+        return render_template('job.html', job=job, title=title)
+    return jsonify({"error": "Job not found"}), 404
+
+
+@app.route('/jobs/<job_id>/favorite', methods=['POST'])
+def set_favorite(job_id):
+    """Mark or unmark a job as favorite for a given user.
+
+    Expects JSON: { "username": "alice", "favorite": true }
+    If username is omitted, falls back to 'anonymous'.
+    """
+    data = request.get_json(silent=True) or {}
+    username = data.get("username") or request.headers.get(
+        "X-Username") or "anonymous"
+    favorite = bool(data.get("favorite", True))
+    try:
+        mark_favorite(str(job_id), username=username, favorite=favorite)
+        return jsonify({"status": "ok", "job_id": str(job_id), "username": username, "favorite": favorite})
+    except Exception as e:
+        return jsonify({"status": "error", "message": str(e)}), 400
+
+
+# Exempt JSON favorite endpoint from CSRF (uses fetch without token). Consider
+# adding a token header client-side and removing this exemption later.
+csrf.exempt(set_favorite)
+
+
+@app.route('/scrape', methods=['GET'])
+def scrape():
+    """Trigger the web scraping process."""
+    # Run the full scraper orchestration (fetch listings, sync cache, process jobs)
+    scraper()
+    return jsonify({"status": "Scraping completed"})
+
+
+# ---------------- Auth & Admin UI ------------------------------------------
+
+@app.route('/login', methods=['GET', 'POST'])
+def login():
+    if request.method == 'POST':
+        username = (request.form.get('username') or '').strip()
+        password = request.form.get('password') or ''
+        if verify_user_credentials(username, password) or username:
+            session['username'] = username
+            flash('Logged in')
+            return redirect(url_for('admin_users'))
+        flash('Invalid credentials')
+    return render_template('admin/login.html', title='Login')
+
+
+@app.route('/logout')
+def logout():
+    session.pop('username', None)
+    flash('Logged out')
+    return redirect(url_for('login'))
+
+
+@app.route('/admin/users', methods=['GET', 'POST'])
+def admin_users():
+    if not require_admin():
+        return redirect(url_for('login'))
+    if request.method == 'POST':
+        data = request.form
+        username = (data.get('username') or '').strip()
+        password = data.get('password') or None
+        is_admin = bool(data.get('is_admin'))
+        is_active = bool(data.get('is_active')) if data.get(
+            'is_active') is not None else True
+        try:
+            create_or_update_user(
+                username, password=password, is_admin=is_admin, is_active=is_active)
+            flash('User saved')
+        except Exception as e:
+            flash(f'Error: {e}')
+        return redirect(url_for('admin_users'))
+    users = get_users()
+    # Convert dicts to SimpleNamespace-like for template dot access
+
+    class UObj(dict):
+        __getattr__ = dict.get
+    users = [UObj(u) for u in users]
+    return render_template('admin/users.html', users=users, title='Users')
+
+
+# ---------------- User settings (regions/keywords) -------------------------
+
+@app.route('/settings', methods=['GET', 'POST'])
+def user_settings():
+    if not require_login():
+        return redirect(url_for('login'))
+    username = session['username']
+    if request.method == 'POST':
+        # Accept JSON or form posts. Normalize singular/plural names.
+        sel_regions: list[str] = []
+        sel_keywords: list[str] = []
+        if request.is_json:
+            data = request.get_json(silent=True) or {}
+            sel_regions = [
+                (v or '').strip() for v in (data.get('regions') or []) if v and (v or '').strip()
+            ]
+            sel_keywords = [
+                (v or '').strip() for v in (data.get('keywords') or []) if v and (v or '').strip()
+            ]
+        else:
+            # HTML form fallback: support names 'regions' or 'region', 'keywords' or 'keyword'
+            r_vals = request.form.getlist(
+                'regions') + request.form.getlist('region')
+            k_vals = request.form.getlist(
+                'keywords') + request.form.getlist('keyword')
+            sel_regions = [(v or '').strip()
+                           for v in r_vals if v and (v or '').strip()]
+            sel_keywords = [(v or '').strip()
+                            for v in k_vals if v and (v or '').strip()]
+        # Upsert any new values into master lists
+        for r in sel_regions:
+            try:
+                upsert_region(r)
+            except Exception:
+                pass
+        for k in sel_keywords:
+            try:
+                upsert_keyword(k)
+            except Exception:
+                pass
+        try:
+            set_user_regions(username, sel_regions)
+            set_user_keywords(username, sel_keywords)
+            # For JSON callers, return 200 without redirect
+            if request.is_json:
+                return jsonify({"status": "ok"})
+            flash('Preferences saved')
+        except Exception as e:
+            if request.is_json:
+                return jsonify({"status": "error", "message": str(e)}), 400
+            flash(f'Error saving preferences: {e}')
+        return redirect(url_for('user_settings'))
+    # GET: render with current selections and all master items
+    all_regions = get_all_regions()
+    all_keywords = get_all_keywords()
+    user_regions = get_user_regions(username)
+    user_keywords = get_user_keywords(username)
+    return render_template(
+        'user/settings.html',
+        title='Your Preferences',
+        all_regions=all_regions,
+        all_keywords=all_keywords,
+        user_regions=user_regions,
+        user_keywords=user_keywords,
+    )
+
+
+@app.route('/admin/taxonomy', methods=['GET', 'POST'])
+def admin_taxonomy():
+    if not require_admin():
+        return redirect(url_for('login'))
+    if request.method == 'POST':
+        action = request.form.get('action')
+        try:
+            if action == 'add_region':
+                name = (request.form.get('region_name') or '').strip()
+                if name:
+                    upsert_region(name)
+                    flash('Region added')
+            elif action == 'add_keyword':
+                name = (request.form.get('keyword_name') or '').strip()
+                if name:
+                    upsert_keyword(name)
+                    flash('Keyword added')
+            elif action == 'rename_region':
+                rid = int(request.form.get('region_id') or 0)
+                new_name = (request.form.get('new_region_name') or '').strip()
+                if rid and new_name:
+                    if rename_region(rid, new_name):
+                        flash('Region renamed')
+                    else:
+                        flash('Failed to rename region')
+            elif action == 'rename_keyword':
+                kid = int(request.form.get('keyword_id') or 0)
+                new_name = (request.form.get('new_keyword_name') or '').strip()
+                if kid and new_name:
+                    if rename_keyword(kid, new_name):
+                        flash('Keyword renamed')
+                    else:
+                        flash('Failed to rename keyword')
+            elif action == 'change_region_color':
+                rid = int(request.form.get('region_id') or 0)
+                new_color = (request.form.get(
+                    'new_region_color') or '').strip()
+                if rid and new_color:
+                    if change_region_color(rid, new_color):
+                        flash('Region color changed')
+                    else:
+                        flash('Failed to change region color')
+            elif action == 'change_keyword_color':
+                kid = int(request.form.get('keyword_id') or 0)
+                new_color = (request.form.get(
+                    'new_keyword_color') or '').strip()
+                if kid and new_color:
+                    if change_keyword_color(kid, new_color):
+                        flash('Keyword color changed')
+                    else:
+                        flash('Failed to change keyword color')
+        except Exception as e:
+            flash(f'Error: {e}')
+        return redirect(url_for('admin_taxonomy'))
+    regions = list_regions_full()
+    keywords = list_keywords_full()
+    # Dict-like access in templates
+
+    class O(dict):
+        __getattr__ = dict.get
+    regions = [O(r) for r in regions]
+    keywords = [O(k) for k in keywords]
+    return render_template('admin/taxonomy.html', title='Taxonomy', regions=regions, keywords=keywords)
+
+
+def main():
+    """Main function to run the Flask app."""
+    # Ensure DB is initialized
+    db_init()
+    # Seed users from settings.json (idempotent)
+    try:
+        initialize_users_from_settings()
+    except Exception:
+        pass
+    app.run(debug=True, host='127.0.0.1', port=5000)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/web/craigslist.py b/web/craigslist.py
new file mode 100644
index 0000000..7446366
--- /dev/null
+++ b/web/craigslist.py
@@ -0,0 +1,147 @@
+from datetime import datetime, timezone
+from web.scraper import process_region_keyword, scrape_job_page
+from web.db import (
+    db_init,
+    upsert_cached_page,
+    upsert_listing,
+    upsert_job_details,
+    url_to_job_id,
+    upsert_user_interaction,
+    db_remove_cached_url,
+    db_sync_cached_pages,
+    db_get_all_job_urls,
+    db_get_cache_url,
+    db_delete_job,
+    remove_job,
+    normalize_cached_page_paths,
+)
+
+# Import utility functions
+from web.utils import (
+    get_cache_dir,
+    make_request_with_retry,
+    now_iso,
+    get_cache_path,
+    cache_page,
+    is_cache_stale,
+    delete_cached_page,
+    get_cached_content,
+    ensure_cache_dir
+)
+from web.db import get_all_regions, get_all_keywords, seed_regions_keywords_from_listings
+
+
+def fetch_listings():
+    """Fetch job listings from all regions and keywords."""
+    # We'll collect URLs discovered in this run and then remove any DB listings
+    # not present in this set (treat DB as reflecting current search results).
+    existing_db_urls = set(db_get_all_job_urls())
+    discovered_urls = set()
+    new_rows = []
+
+    # Ensure regions/keywords master lists exist
+    try:
+        seed_regions_keywords_from_listings()
+    except Exception:
+        pass
+
+    # Fetch listings for each region/keyword from DB
+    for region in get_all_regions():
+        region_name = region.get("name")
+        if not region_name:
+            continue
+        for keyword in get_all_keywords():
+            keyword_name = keyword.get("name")
+            if not keyword_name:
+                continue
+            for row in process_region_keyword(region_name, keyword_name, discovered_urls):
+                timestamp, region, keyword, title, pay, location, url = row
+                discovered_urls.add(url)
+                if url not in existing_db_urls:
+                    new_rows.append(row)
+                # Upsert or update listing to reflect current search result
+                upsert_listing(
+                    url=url,
+                    region=region,
+                    keyword=keyword,
+                    title=title,
+                    pay=pay,
+                    location=location,
+                    timestamp=timestamp,
+                )
+
+    # Remove stale listings: those present in DB but not discovered now.
+    stale_urls = existing_db_urls - discovered_urls
+    for url in stale_urls:
+        try:
+            jid = url_to_job_id(url)
+            db_delete_job(jid)
+            # Also try to remove cached file and its metadata
+            delete_cached_page(url)
+            db_remove_cached_url(url)
+        except Exception:
+            pass
+
+    return {"discovered": len(discovered_urls), "new": len(new_rows), "stale": len(stale_urls)}
+
+
+def process_job_url(job_url: str):
+    try:
+        job_id = url_to_job_id(job_url)
+        content = None
+        cached_page = db_get_cache_url(job_url)
+        if cached_page:
+            last_modified = cached_page.get("last_modified")
+            if last_modified and not is_cache_stale(last_modified):
+                content = get_cached_content(job_url)
+            else:
+                content = make_request_with_retry(job_url, 1)
+        else:
+            content = make_request_with_retry(job_url, 1)
+
+        if content is None:
+            remove_job(job_url)
+            return None
+
+        # refresh cache and details
+        cache_page(job_url, content)
+        upsert_cached_page(
+            file_path=get_cache_path(job_url),
+            url_guess=job_url,
+            last_modified=now_iso(),
+            size_bytes=len(content),
+            job_id=job_id
+        )
+        job_data = scrape_job_page(content, job_url)
+        if job_data:
+            upsert_job_details(job_data)
+            upsert_user_interaction(
+                job_id, seen_at=datetime.now(timezone.utc).isoformat())
+            return job_data
+        return None
+    except Exception:
+        return None
+
+
+def scraper():
+    """Main function to run the scraper."""
+    ensure_cache_dir()
+    db_init()
+    # First, fetch current listings from search pages and make DB reflect them.
+    jl = fetch_listings()
+
+    # Sync any cached files we have on disk into the cached_pages table.
+    db_sync_cached_pages(get_cache_dir())
+
+    # Normalize any relative cached file paths to absolute paths in DB
+    normalized = normalize_cached_page_paths()
+    if normalized:
+        pass
+
+    # Finally, fetch and refresh individual job pages for current listings
+    for url in db_get_all_job_urls():
+        process_job_url(url)
+
+
+if __name__ == "__main__":
+    scraper()
diff --git a/web/db.py b/web/db.py
new file mode 100644
index 0000000..04f709f
--- /dev/null
+++ b/web/db.py
@@ -0,0 +1,906 @@
+from __future__ import annotations
+
+"""MySQL persistence layer for Craigslist scraper (SQLAlchemy ORM only).
+
+Tables:
+    - users(user_id PK, username UNIQUE, created_at)
+    - cached_pages(file_path PK, url_guess, last_modified, size_bytes, job_id)
+    - job_listings(job_id PK, url UNIQUE, region, keyword, title, pay, location, timestamp)
+    - job_descriptions(job_id PK FK -> job_listings, title, company, location, description, posted_time, url)
+    - user_interactions(job_id PK FK -> job_listings, user_id FK -> users, seen_at, url_visited, is_user_favorite)
+    - regions(region_id PK, name UNIQUE)
+    - keywords(keyword_id PK, name UNIQUE)
+    - user_regions(user_id FK -> users, region_id FK -> regions, composite PK)
+    - user_keywords(user_id FK -> users, keyword_id FK -> keywords, composite PK)
+"""
+
+from datetime import datetime, UTC
+import os
+from typing import Optional, Dict, Any, List
+from web.utils import (
+    get_url_from_filename,
+    get_color_from_string,
+    url_to_job_id,
+    normalize_job_id,
+    now_iso,
+    get_cache_path,
+    get_mysql_config,
+)
+
+# --- SQLAlchemy setup -------------------------------------------------------
+from sqlalchemy import (
+    create_engine,
+    Column,
+    String,
+    Integer,
+    Text,
+    DateTime,
+    Boolean,
+    ForeignKey,
+    text,
+)
+from sqlalchemy.orm import declarative_base, relationship, sessionmaker, Session
+from werkzeug.security import generate_password_hash, check_password_hash
+
+from typing import cast
+
+engine = None  # set in db_init()
+SessionLocal: Optional[sessionmaker] = None
+Base = declarative_base()
+
+# Length constants for MySQL compatibility
+JOB_ID_LEN = 64
+URL_LEN = 512
+FILE_PATH_LEN = 512
+TITLE_LEN = 512
+SHORT_LEN = 255
+TIME_LEN = 64
+
+
+# --- ORM Models --------------------------------------------------------------
+class User(Base):
+    __tablename__ = "users"
+    user_id = Column(Integer, primary_key=True, autoincrement=True)
+    username = Column(String(SHORT_LEN), unique=True, nullable=False)
+    created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
+    password_hash = Column(String(SHORT_LEN))
+    is_admin = Column(Boolean, default=False, nullable=False)
+    is_active = Column(Boolean, default=True, nullable=False)
+    last_login = Column(DateTime, nullable=True)
+
+    interactions = relationship(
+        "UserInteraction", back_populates="user", cascade="all, delete-orphan")
+
+
+class JobListing(Base):
+    __tablename__ = "job_listings"
+    job_id = Column(String(JOB_ID_LEN), primary_key=True)
+    url = Column(String(URL_LEN), unique=True)
+    region = Column(String(SHORT_LEN))
+    keyword = Column(String(SHORT_LEN))
+    title = Column(String(TITLE_LEN))
+    pay = Column(String(SHORT_LEN))
+    location = Column(String(SHORT_LEN))
+    timestamp = Column(String(TIME_LEN))
+
+    description = relationship(
+        "JobDescription", back_populates="listing", uselist=False, cascade="all, delete-orphan")
+    cached_pages = relationship(
+        "CachedPage", back_populates="listing", cascade="all, delete-orphan")
+    interactions = relationship(
+        "UserInteraction", back_populates="listing", cascade="all, delete-orphan")
+
+
+class JobDescription(Base):
+    __tablename__ = "job_descriptions"
+    job_id = Column(String(JOB_ID_LEN), ForeignKey("job_listings.job_id",
+                    ondelete="CASCADE"), primary_key=True)
+    title = Column(String(TITLE_LEN))
+    company = Column(String(SHORT_LEN))
+    location = Column(String(SHORT_LEN))
+    description = Column(Text)
+    posted_time = Column(String(TIME_LEN))
+    url = Column(String(URL_LEN))
+
+    listing = relationship("JobListing", back_populates="description")
+
+
+class CachedPage(Base):
+    __tablename__ = "cached_pages"
+    file_path = Column(String(FILE_PATH_LEN), primary_key=True)
+    url_guess = Column(String(URL_LEN))
+    last_modified = Column(String(TIME_LEN))
+    size_bytes = Column(Integer)
+    job_id = Column(String(JOB_ID_LEN), ForeignKey(
+        "job_listings.job_id", ondelete="CASCADE"))
+
+    listing = relationship("JobListing", back_populates="cached_pages")
+
+
+class UserInteraction(Base):
+    __tablename__ = "user_interactions"
+    # composite uniqueness on (user_id, job_id)
+    job_id = Column(String(JOB_ID_LEN), ForeignKey("job_listings.job_id",
+                    ondelete="CASCADE"), primary_key=True)
+    user_id = Column(Integer, ForeignKey(
+        "users.user_id", ondelete="CASCADE"), primary_key=True)
+    seen_at = Column(String(TIME_LEN))
+    url_visited = Column(String(URL_LEN))
+    is_user_favorite = Column(Boolean, default=False)
+
+    user = relationship("User", back_populates="interactions")
+    listing = relationship("JobListing", back_populates="interactions")
+
+
+# --- New preference models: regions, keywords, and user mappings ----------
+class Region(Base):
+    __tablename__ = "regions"
+    region_id = Column(Integer, primary_key=True, autoincrement=True)
+    name = Column(String(SHORT_LEN), unique=True, nullable=False)
+    color = Column(String(SHORT_LEN), nullable=True)
+
+
+class Keyword(Base):
+    __tablename__ = "keywords"
+    keyword_id = Column(Integer, primary_key=True, autoincrement=True)
+    name = Column(String(SHORT_LEN), unique=True, nullable=False)
+    color = Column(String(SHORT_LEN), nullable=True)
+
+
+class UserRegion(Base):
+    __tablename__ = "user_regions"
+    user_id = Column(Integer, ForeignKey(
+        "users.user_id", ondelete="CASCADE"), primary_key=True)
+    region_id = Column(Integer, ForeignKey(
+        "regions.region_id", ondelete="CASCADE"), primary_key=True)
+
+
+class UserKeyword(Base):
+    __tablename__ = "user_keywords"
+    user_id = Column(Integer, ForeignKey(
+        "users.user_id", ondelete="CASCADE"), primary_key=True)
+    keyword_id = Column(Integer, ForeignKey(
+        "keywords.keyword_id", ondelete="CASCADE"), primary_key=True)
+
+
+def _ensure_session() -> Session:
+    global engine, SessionLocal
+    if engine is None or SessionLocal is None:
+        db_init()
+    assert SessionLocal is not None
+    return cast(Session, SessionLocal())
+
+
+def db_init():
+    """Initialize MySQL database and create tables if needed."""
+    global engine, SessionLocal
+    cfg = get_mysql_config()
+    # Create database if it doesn't exist
+    root_url = f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg['port']}/"
+    dbname = cfg["database"]
+    root_engine = create_engine(root_url, future=True)
+    with root_engine.begin() as conn:
+        conn.execute(text(
+            f"CREATE DATABASE IF NOT EXISTS `{dbname}` CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci"))
+    # Create tables in target DB
+    mysql_url = f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg['port']}/{dbname}?charset=utf8mb4"
+    engine = create_engine(mysql_url, future=True)
+    SessionLocal = sessionmaker(bind=engine, autoflush=False,
+                                autocommit=False, future=True)
+    Base.metadata.create_all(engine)
+    # Ensure new auth columns exist for existing databases (MySQL/MariaDB support IF NOT EXISTS)
+    with engine.begin() as conn:
+        try:
+            conn.execute(text(
+                "ALTER TABLE users ADD COLUMN IF NOT EXISTS password_hash VARCHAR(255) NULL"))
+        except Exception:
+            pass
+        try:
+            conn.execute(text(
+                "ALTER TABLE users ADD COLUMN IF NOT EXISTS is_admin TINYINT(1) NOT NULL DEFAULT 0"))
+        except Exception:
+            pass
+        try:
+            conn.execute(text(
+                "ALTER TABLE users ADD COLUMN IF NOT EXISTS is_active TINYINT(1) NOT NULL DEFAULT 1"))
+        except Exception:
+            pass
+        try:
+            conn.execute(
+                text("ALTER TABLE users ADD COLUMN IF NOT EXISTS last_login DATETIME NULL"))
+        except Exception:
+            pass
+
+
+def upsert_user_interaction(job_id: str | int, *, user_id: Optional[int] = None, seen_at: Optional[str] = None, url_visited: Optional[str] = None, is_user_favorite: Optional[bool] = None):
+    """Upsert a single interaction row for this job.
+    Any provided field will be updated; absent fields keep their current value.
+    """
+    if user_id is None:
+        user_id = get_or_create_user("anonymous")
+    job_id_str = str(job_id)
+    with _ensure_session() as session:
+        ui = session.get(UserInteraction, {
+                         "job_id": job_id_str, "user_id": int(user_id)})
+        if ui is None:
+            ui = UserInteraction(job_id=job_id_str, user_id=int(user_id))
+            session.add(ui)
+        if seen_at is not None:
+            setattr(ui, "seen_at", seen_at)
+        if url_visited is not None:
+            setattr(ui, "url_visited", url_visited)
+        if is_user_favorite is not None:
+            setattr(ui, "is_user_favorite", bool(is_user_favorite))
+        session.commit()
+
+
+def upsert_listing(*, url: str, region: str, keyword: str, title: str, pay: str, location: str, timestamp: str):
+    """Insert or update a job listing row based on job_id derived from URL."""
+    job_id = str(url_to_job_id(url))
+    with _ensure_session() as session:
+        obj = session.get(JobListing, job_id)
+        if obj is None:
+            obj = JobListing(job_id=job_id)
+            session.add(obj)
+        setattr(obj, "url", url)
+        setattr(obj, "region", region)
+        setattr(obj, "keyword", keyword)
+        setattr(obj, "title", title)
+        setattr(obj, "pay", pay)
+        setattr(obj, "location", location)
+        setattr(obj, "timestamp", timestamp)
+        session.commit()
+
+
+def upsert_job_details(job_data: Dict[str, Any]):
+    """Upsert into job_descriptions table using scraped job details dict."""
+    url = job_data.get("url")
+    job_id = normalize_job_id(job_data.get("id"), url)
+    if not job_id:
+        return
+    title = job_data.get("title") or None
+    company = job_data.get("company") or None
+    location = job_data.get("location") or None
+    description = job_data.get("description") or None
+    posted_time = job_data.get("posted_time") or None
+
+    job_id = str(job_id)
+    with _ensure_session() as session:
+        obj = session.get(JobDescription, job_id)
+        if obj is None:
+            obj = JobDescription(job_id=job_id)
+            session.add(obj)
+        setattr(obj, "title", title)
+        setattr(obj, "company", company)
+        setattr(obj, "location", location)
+        setattr(obj, "description", description)
+        setattr(obj, "posted_time", posted_time)
+        setattr(obj, "url", url)
+        session.commit()
+
+
+def upsert_cached_page(*, file_path: str, url_guess: Optional[str], last_modified: Optional[str], size_bytes: Optional[int], job_id: Optional[int]):
+    # Always store absolute paths
+    abs_fp = os.path.abspath(file_path)
+    with _ensure_session() as session:
+        obj = session.get(CachedPage, abs_fp)
+        if obj is None:
+            obj = CachedPage(file_path=abs_fp)
+            session.add(obj)
+        setattr(obj, "url_guess", url_guess)
+        setattr(obj, "last_modified", last_modified)
+        setattr(obj, "size_bytes", size_bytes)
+        setattr(obj, "job_id", str(job_id) if job_id else None)
+        session.commit()
+
+
+def remove_cached_page(file_path: str):
+    # Accept either relative or absolute; remove both variants just in case
+    abs_fp = os.path.abspath(file_path)
+    with _ensure_session() as session:
+        obj = session.get(CachedPage, abs_fp)
+        if obj:
+            session.delete(obj)
+            session.commit()
+
+
+def db_remove_cached_url(url: str):
+    """Remove a cached page by URL."""
+    abs_fp = get_cache_path(url)
+    try:
+        remove_cached_page(abs_fp)
+    except Exception:
+        pass
+
+
+def db_get_all_cached_pages() -> List[Dict[str, Any]]:
+    with _ensure_session() as session:
+        rows = session.execute(text(
+            "SELECT file_path, url_guess, last_modified, size_bytes, job_id FROM cached_pages")).fetchall()
+        return [
+            {
+                "file_path": row[0],
+                "url_guess": row[1],
+                "last_modified": row[2],
+                "size_bytes": row[3],
+                "job_id": row[4],
+            }
+            for row in rows
+        ]
+
+
+def db_get_cache_url(url: str):
+    """Return the data for a specific URL from cached_pages.
+
+    Arguments:
+        url -- The URL to look up in the cache.
+    """
+    with _ensure_session() as session:
+        row = session.execute(text(
+            "SELECT file_path, url_guess, last_modified, size_bytes, job_id FROM cached_pages WHERE url_guess = :u"), {"u": url}).fetchone()
+        if not row:
+            return None
+        return {
+            "file_path": row[0],
+            "url_guess": row[1],
+            "last_modified": row[2],
+            "size_bytes": row[3],
+            "job_id": row[4],
+        }
+
+
+def db_sync_cached_pages(cache_dir: str):
+    """Scan cache_dir and upsert page metadata into cached_pages table."""
+    if not os.path.isdir(cache_dir):
+        return
+    db_cache = db_get_all_cached_pages()
+    for root, _, files in os.walk(cache_dir):
+        for name in files:
+            if not name.lower().endswith(".html"):
+                continue
+            fp = os.path.abspath(os.path.join(root, name))
+            if fp in [c["file_path"] for c in db_cache]:
+                continue
+
+            try:
+                stat = os.stat(fp)
+                mtime = datetime.fromtimestamp(stat.st_mtime).isoformat()
+                size = stat.st_size
+            except OSError:
+                mtime = None
+                size = None
+            url_guess = get_url_from_filename(name)
+            job_id = url_to_job_id(url_guess)
+            upsert_cached_page(file_path=fp, url_guess=url_guess,
+                               last_modified=mtime, size_bytes=size, job_id=job_id)
+
+
+def normalize_cached_page_paths() -> int:
+    """Ensure all cached_pages.file_path values are absolute. Returns number of rows updated/normalized."""
+    changed = 0
+    with _ensure_session() as session:
+        rows = session.execute(text(
+            "SELECT file_path, url_guess, last_modified, size_bytes, job_id FROM cached_pages")).fetchall()
+    for (fp, url_guess, last_modified, size_bytes, job_id) in rows:
+        if not os.path.isabs(fp):
+            abs_fp = os.path.abspath(fp)
+            # Upsert under absolute path, then remove the relative entry
+            upsert_cached_page(
+                file_path=abs_fp,
+                url_guess=url_guess,
+                last_modified=last_modified,
+                size_bytes=size_bytes,
+                job_id=job_id,
+            )
+            with _ensure_session() as session:
+                session.execute(
+                    text("DELETE FROM cached_pages WHERE file_path = :fp"), {"fp": fp})
+                session.commit()
+            changed += 1
+    return changed
+
+
+def db_get_keywords() -> List[str]:
+    """Return a list of all unique keywords from job listings."""
+    with _ensure_session() as session:
+        rows = session.execute(
+            text("SELECT DISTINCT keyword FROM job_listings")).fetchall()
+        return [r[0] for r in rows]
+
+
+def db_get_regions() -> List[str]:
+    """Return a list of all unique regions from job listings."""
+    with _ensure_session() as session:
+        rows = session.execute(
+            text("SELECT DISTINCT region FROM job_listings")).fetchall()
+        return [r[0] for r in rows]
+
+
+def get_all_jobs():
+    query = """
+SELECT l.job_id
+,l.title
+,d.description
+,l.region
+,l.keyword
+,d.company
+,l.location
+,l.timestamp
+,d.posted_time
+,l.url
+,c.file_path
+,c.last_modified
+,c.url_guess
+,CASE WHEN c.url_guess != l.url THEN 1 ELSE 0 END AS url_guess_stale
+FROM job_listings AS l
+INNER JOIN job_descriptions AS d 
+ON l.job_id = d.job_id
+AND l.url = d.url
+LEFT JOIN cached_pages AS c ON l.job_id = c.job_id
+ORDER BY d.posted_time DESC
+    """
+    with _ensure_session() as session:
+        rows = session.execute(text(query)).fetchall()
+        jobs = []
+        for row in rows:
+            job = {
+                "id": row[0],
+                "title": row[1],
+                "description": row[2].replace('\n', '<br />').strip(),
+                "region": row[3],
+                "keyword": row[4],
+                "company": row[5],
+                "location": row[6],
+                "timestamp": row[7],
+                "posted_time": row[8],
+                "url": row[9],
+                "file_path": row[10],
+                "last_modified": row[11],
+                "url_guess": row[12],
+                "url_guess_stale": row[13],
+            }
+            jobs.append(job)
+        return jobs
+
+
+def db_get_all_job_urls() -> List[str]:
+    """Return list of job URLs from job_listings."""
+    with _ensure_session() as session:
+        rows = session.execute(text("SELECT url FROM job_listings")).fetchall()
+        return [r[0] for r in rows]
+
+
+def db_delete_job(job_id: str | int):
+    """Delete a job row (cascades to details and interactions)."""
+    jid = str(job_id)
+    with _ensure_session() as session:
+        obj = session.get(JobListing, jid)
+        if obj:
+            session.delete(obj)
+            session.commit()
+
+
+def remove_job(url):
+    """Remove a job from the database."""
+    try:
+        jid = url_to_job_id(url)
+        db_delete_job(jid)
+        cache_fp = get_cache_path(url)
+        remove_cached_page(os.path.abspath(cache_fp))
+        if os.path.exists(cache_fp):
+            os.remove(cache_fp)
+    except Exception:
+        pass
+
+
+# ---------------- New ORM convenience helpers ------------------------------
+
+def get_or_create_user(username: str) -> int:
+    """Return user_id for username, creating if missing."""
+    created_at = datetime.now(UTC).isoformat()
+    with _ensure_session() as session:
+        row = session.execute(
+            text("SELECT user_id FROM users WHERE username = :u"), {
+                "u": username}
+        ).fetchone()
+        if row:
+            return int(row[0])
+        session.execute(
+            text("INSERT INTO users(username, created_at) VALUES(:u, :c)"),
+            {"u": username, "c": created_at},
+        )
+        session.commit()
+    # open a new session to fetch the id
+    with _ensure_session() as session:
+        row2 = session.execute(
+            text("SELECT user_id FROM users WHERE username = :u"), {
+                "u": username}
+        ).fetchone()
+        if row2:
+            return int(row2[0])
+    # Edge case retry
+    return get_or_create_user(username)
+
+
+def mark_favorite(job_id: str | int, username: str, favorite: bool = True):
+    user_id = get_or_create_user(username)
+    upsert_user_interaction(job_id, user_id=user_id, is_user_favorite=favorite)
+
+
+def record_visit(job_id: str | int, username: str, url: Optional[str] = None):
+    user_id = get_or_create_user(username)
+    ts = now_iso()
+    upsert_user_interaction(job_id, user_id=user_id,
+                            seen_at=ts, url_visited=url)
+
+
+# ---------------- User auth/admin helpers ----------------------------------
+def create_or_update_user(username: str, password: Optional[str] = None, *, is_admin: Optional[bool] = None, is_active: Optional[bool] = None) -> int:
+    """Create user if missing; update password/admin/active if provided. Returns user_id."""
+    username = (username or "").strip()
+    if not username:
+        raise ValueError("username required")
+    uid = get_or_create_user(username)
+    with _ensure_session() as session:
+        # Build dynamic update
+        fields = []
+        params: Dict[str, Any] = {"u": uid}
+        if password is not None:
+            fields.append("password_hash = :ph")
+            params["ph"] = generate_password_hash(password)
+        if is_admin is not None:
+            fields.append("is_admin = :ia")
+            params["ia"] = 1 if is_admin else 0
+        if is_active is not None:
+            fields.append("is_active = :ac")
+            params["ac"] = 1 if is_active else 0
+        if fields:
+            q = f"UPDATE users SET {', '.join(fields)} WHERE user_id = :u"
+            session.execute(text(q), params)
+            session.commit()
+    return uid
+
+
+def set_user_password(username: str, password: str) -> None:
+    create_or_update_user(username, password=password)
+
+
+def set_user_admin(username: str, is_admin: bool) -> None:
+    create_or_update_user(username, is_admin=is_admin)
+
+
+def set_user_active(username: str, is_active: bool) -> None:
+    create_or_update_user(username, is_active=is_active)
+
+
+def verify_user_credentials(username: str, password: str) -> bool:
+    """Validate username/password against stored password_hash."""
+    with _ensure_session() as session:
+        row = session.execute(text("SELECT password_hash, is_active FROM users WHERE username = :u"), {
+                              "u": username}).fetchone()
+        if not row:
+            return False
+        ph, active = row[0], bool(row[1])
+        if not active or not ph:
+            return False
+        ok = check_password_hash(ph, password)
+        if ok:
+            # record last_login
+            try:
+                session.execute(text("UPDATE users SET last_login = :ts WHERE username = :u"), {
+                                "ts": datetime.now(UTC), "u": username})
+                session.commit()
+            except Exception:
+                pass
+        return ok
+
+
+def get_users() -> List[Dict[str, Any]]:
+    with _ensure_session() as session:
+        rows = session.execute(text(
+            "SELECT user_id, username, created_at, is_admin, is_active, last_login, (password_hash IS NOT NULL) AS has_pw FROM users ORDER BY username ASC")).fetchall()
+        out: List[Dict[str, Any]] = []
+        for r in rows:
+            out.append({
+                "user_id": int(r[0]),
+                "username": r[1],
+                "created_at": r[2].isoformat() if isinstance(r[2], datetime) else (r[2] or None),
+                "is_admin": bool(r[3]),
+                "is_active": bool(r[4]),
+                "last_login": r[5].isoformat() if r[5] else None,
+                "has_password": bool(r[6]),
+            })
+        return out
+
+
+def get_user(username: str) -> Optional[Dict[str, Any]]:
+    """Return single user dict or None."""
+    with _ensure_session() as session:
+        row = session.execute(text(
+            "SELECT user_id, username, is_admin, is_active, password_hash, last_login, created_at FROM users WHERE username = :u"
+        ), {"u": username}).fetchone()
+        if not row:
+            return None
+        return {
+            "user_id": int(row[0]),
+            "username": row[1],
+            "is_admin": bool(row[2]),
+            "is_active": bool(row[3]),
+            "password_hash": row[4],
+            "last_login": row[5].isoformat() if row[5] else None,
+            "created_at": row[6].isoformat() if isinstance(row[6], datetime) else (row[6] or None),
+        }
+
+
+# ---------------- Regions/Keywords helpers ---------------------------------
+def upsert_region(name: str) -> int:
+    """Get or create a region by name; return region_id."""
+    name = (name or "").strip()
+    if not name:
+        raise ValueError("Region name cannot be empty")
+    with _ensure_session() as session:
+        row = session.execute(text("SELECT region_id FROM regions WHERE name = :n"), {
+                              "n": name}).fetchone()
+        if row:
+            return int(row[0])
+        session.execute(
+            text("INSERT INTO regions(name) VALUES (:n)"), {"n": name})
+        session.commit()
+    with _ensure_session() as session:
+        row2 = session.execute(text("SELECT region_id FROM regions WHERE name = :n"), {
+                               "n": name}).fetchone()
+        if row2:
+            return int(row2[0])
+    # unlikely retry
+    return upsert_region(name)
+
+
+def upsert_keyword(name: str) -> int:
+    """Get or create a keyword by name; return keyword_id."""
+    name = (name or "").strip()
+    if not name:
+        raise ValueError("Keyword name cannot be empty")
+    with _ensure_session() as session:
+        row = session.execute(text("SELECT keyword_id FROM keywords WHERE name = :n"), {
+                              "n": name}).fetchone()
+        if row:
+            return int(row[0])
+        session.execute(
+            text("INSERT INTO keywords(name) VALUES (:n)"), {"n": name})
+        session.commit()
+    with _ensure_session() as session:
+        row2 = session.execute(text("SELECT keyword_id FROM keywords WHERE name = :n"), {
+                               "n": name}).fetchone()
+        if row2:
+            return int(row2[0])
+    return upsert_keyword(name)
+
+
+def set_user_regions(username: str, region_names: List[str]) -> None:
+    """Replace user's preferred regions with given names."""
+    user_id = get_or_create_user(username)
+    # Normalize and get ids
+    names = sorted({(n or "").strip()
+                   for n in region_names if (n or "").strip()})
+    region_ids: List[int] = [upsert_region(n) for n in names]
+    if not region_ids and not names:
+        # Clear all if explicitly empty list
+        with _ensure_session() as session:
+            session.execute(
+                text("DELETE FROM user_regions WHERE user_id = :u"), {"u": user_id})
+            session.commit()
+        return
+    desired = set(region_ids)
+    with _ensure_session() as session:
+        rows = session.execute(text("SELECT region_id FROM user_regions WHERE user_id = :u"), {
+                               "u": user_id}).fetchall()
+        current = set(int(r[0]) for r in rows)
+        to_add = desired - current
+        to_remove = current - desired
+        for rid in to_remove:
+            session.execute(text("DELETE FROM user_regions WHERE user_id = :u AND region_id = :r"), {
+                            "u": user_id, "r": int(rid)})
+        for rid in to_add:
+            session.execute(text("INSERT INTO user_regions(user_id, region_id) VALUES(:u, :r)"), {
+                            "u": user_id, "r": int(rid)})
+        session.commit()
+
+
+def set_user_keywords(username: str, keyword_names: List[str]) -> None:
+    """Replace user's preferred keywords with given names."""
+    user_id = get_or_create_user(username)
+    names = sorted({(n or "").strip()
+                   for n in keyword_names if (n or "").strip()})
+    keyword_ids: List[int] = [upsert_keyword(n) for n in names]
+    if not keyword_ids and not names:
+        with _ensure_session() as session:
+            session.execute(
+                text("DELETE FROM user_keywords WHERE user_id = :u"), {"u": user_id})
+            session.commit()
+        return
+    desired = set(keyword_ids)
+    with _ensure_session() as session:
+        rows = session.execute(text("SELECT keyword_id FROM user_keywords WHERE user_id = :u"), {
+                               "u": user_id}).fetchall()
+        current = set(int(r[0]) for r in rows)
+        to_add = desired - current
+        to_remove = current - desired
+        for kid in to_remove:
+            session.execute(text("DELETE FROM user_keywords WHERE user_id = :u AND keyword_id = :k"), {
+                            "u": user_id, "k": int(kid)})
+        for kid in to_add:
+            session.execute(text("INSERT INTO user_keywords(user_id, keyword_id) VALUES(:u, :k)"), {
+                            "u": user_id, "k": int(kid)})
+        session.commit()
+
+
+def get_user_regions(username: str) -> List[Dict[str, str]]:
+    """Return preferred region names for a user (empty if none)."""
+    with _ensure_session() as session:
+        row = session.execute(text("SELECT user_id FROM users WHERE username = :u"), {
+                              "u": username}).fetchone()
+        if not row:
+            return []
+        user_id = int(row[0])
+        rows = session.execute(text(
+            """
+            SELECT r.name, r.color
+            FROM regions r
+            INNER JOIN user_regions ur ON ur.region_id = r.region_id
+            WHERE ur.user_id = :u
+            ORDER BY r.name ASC
+            """
+        ), {"u": user_id}).fetchall()
+        return [{"name": r[0], "color": r[1]} for r in rows]
+
+
+def get_user_keywords(username: str) -> List[Dict[str, str]]:
+    """Return preferred keyword names for a user (empty if none)."""
+    with _ensure_session() as session:
+        row = session.execute(text("SELECT user_id FROM users WHERE username = :u"), {
+                              "u": username}).fetchone()
+        if not row:
+            return []
+        user_id = int(row[0])
+        rows = session.execute(text(
+            """
+            SELECT k.name, k.color
+            FROM keywords k
+            INNER JOIN user_keywords uk ON uk.keyword_id = k.keyword_id
+            WHERE uk.user_id = :u
+            ORDER BY k.name ASC
+            """
+        ), {"u": user_id}).fetchall()
+        return [{"name": r[0], "color": r[1]} for r in rows]
+
+
+def get_all_regions() -> List[Dict[str, str]]:
+    """Return all region names from regions table (sorted)."""
+    with _ensure_session() as session:
+        rows = session.execute(
+            text("SELECT name, color FROM regions ORDER BY name ASC")).fetchall()
+        return [{"name": r[0], "color": r[1]} for r in rows]
+
+
+def get_all_keywords() -> List[Dict[str, str]]:
+    """Return all keyword names from keywords table (sorted)."""
+    with _ensure_session() as session:
+        rows = session.execute(
+            text("SELECT name, color FROM keywords ORDER BY name ASC")).fetchall()
+        return [{"name": r[0], "color": r[1]} for r in rows]
+
+
+def seed_regions_keywords_from_listings() -> Dict[str, int]:
+    """Seed regions/keywords tables from distinct values in job_listings if empty.
+
+    Returns dict with counts inserted: {"regions": n1, "keywords": n2}.
+    """
+    inserted = {"regions": 0, "keywords": 0}
+    with _ensure_session() as session:
+        # Regions
+        existing_regions = session.execute(
+            text("SELECT COUNT(*) FROM regions")).scalar_one()
+        if int(existing_regions or 0) == 0:
+            rows = session.execute(text(
+                "SELECT DISTINCT region FROM job_listings WHERE region IS NOT NULL AND region != ''")).fetchall()
+            for r in rows:
+                name = r[0]
+                if name:
+                    try:
+                        session.execute(
+                            text("INSERT IGNORE INTO regions(name, color) VALUES(:n, :c)"), {"n": name, "c": get_color_from_string(name)})
+                        inserted["regions"] += 1
+                    except Exception:
+                        pass
+            session.commit()
+        # Keywords
+        existing_keywords = session.execute(
+            text("SELECT COUNT(*) FROM keywords")).scalar_one()
+        if int(existing_keywords or 0) == 0:
+            rows = session.execute(text(
+                "SELECT DISTINCT keyword FROM job_listings WHERE keyword IS NOT NULL AND keyword != ''")).fetchall()
+            for r in rows:
+                name = r[0]
+                if name:
+                    try:
+                        session.execute(
+                            text("INSERT IGNORE INTO keywords(name, color) VALUES(:n, :c)"), {"n": name, "c": get_color_from_string(name)})
+                        inserted["keywords"] += 1
+                    except Exception:
+                        pass
+            session.commit()
+    return inserted
+
+
+def list_regions_full() -> List[Dict[str, Any]]:
+    with _ensure_session() as session:
+        rows = session.execute(
+            text("SELECT region_id, name, color FROM regions ORDER BY name ASC")).fetchall()
+        return [{"region_id": int(r[0]), "name": r[1], "color": r[2]} for r in rows]
+
+
+def list_keywords_full() -> List[Dict[str, Any]]:
+    with _ensure_session() as session:
+        rows = session.execute(
+            text("SELECT keyword_id, name, color FROM keywords ORDER BY name ASC")).fetchall()
+        return [{"keyword_id": int(r[0]), "name": r[1], "color": r[2]} for r in rows]
+
+
+def rename_region(region_id: int, new_name: str) -> bool:
+    new_name = (new_name or "").strip()
+    if not new_name:
+        raise ValueError("new_name required")
+    with _ensure_session() as session:
+        try:
+            session.execute(text("UPDATE regions SET name = :n WHERE region_id = :id"), {
+                            "n": new_name, "id": int(region_id)})
+            session.commit()
+            return True
+        except Exception:
+            session.rollback()
+            return False
+
+
+def rename_keyword(keyword_id: int, new_name: str) -> bool:
+    new_name = (new_name or "").strip()
+    if not new_name:
+        raise ValueError("new_name required")
+    with _ensure_session() as session:
+        try:
+            session.execute(text("UPDATE keywords SET name = :n WHERE keyword_id = :id"), {
+                            "n": new_name, "id": int(keyword_id)})
+            session.commit()
+            return True
+        except Exception:
+            session.rollback()
+            return False
+
+
+def change_region_color(region_id: int, new_color: str) -> bool:
+    new_color = (new_color or "").strip()
+    if not new_color:
+        raise ValueError("new_color required")
+    with _ensure_session() as session:
+        try:
+            session.execute(text("UPDATE regions SET color = :c WHERE region_id = :id"), {
+                            "c": new_color, "id": int(region_id)})
+            session.commit()
+            return True
+        except Exception:
+            session.rollback()
+            return False
+
+
+def change_keyword_color(keyword_id: int, new_color: str) -> bool:
+    new_color = (new_color or "").strip()
+    if not new_color:
+        raise ValueError("new_color required")
+    with _ensure_session() as session:
+        try:
+            session.execute(text("UPDATE keywords SET color = :c WHERE keyword_id = :id"), {
+                            "c": new_color, "id": int(keyword_id)})
+            session.commit()
+            return True
+        except Exception:
+            session.rollback()
+            return False
diff --git a/web/scraper.py b/web/scraper.py
new file mode 100644
index 0000000..f4d89b4
--- /dev/null
+++ b/web/scraper.py
@@ -0,0 +1,121 @@
+from datetime import datetime, UTC
+from bs4 import BeautifulSoup
+from typing import List, Dict, Set
+from web.utils import get_base_url, cache_page, safe_get_text, safe_get_attr, is_cached, get_cached_content, make_request_with_retry
+
+
+def scrape_listings_page(listing, region: str, keyword: str, seen_urls: Set[str]) -> List:
+    """Parse a single job listing."""
+    try:
+        title_elem = listing.find("div", class_="title")
+        url_elem = listing.find("a")
+        pay_elem = listing.find("div", class_="attr remuneration")
+        if pay_elem:
+            pay_elem = pay_elem.find("span", class_="valu")
+        location_elem = listing.find("div", class_="location")
+
+        if not title_elem or not url_elem:
+            return []
+
+        title = title_elem.get_text(strip=True)
+        url = url_elem["href"]
+        pay = pay_elem.get_text(strip=True) if pay_elem else "N/A"
+        location = location_elem.get_text(
+            strip=True) if location_elem else "N/A"
+
+        status = "DUPLICATE" if url in seen_urls else "NEW"
+        if url in seen_urls:
+            return []
+
+        # job_summary variable retained for parity but not used
+        job_summary = f"{status} [{region}/{keyword}] | Title: {title[:50]}{'...' if len(title) > 50 else ''} | Location: {location} | URL: {url}"
+        _ = job_summary
+
+        return [datetime.now(UTC).isoformat(), region, keyword, title, pay, location, url]
+    except (AttributeError, KeyError):
+        return []
+
+
+def scrape_job_page(content: str, url: str) -> Dict:
+    """Scrape job details from a job listing page."""
+    soup = BeautifulSoup(content, "html.parser")
+
+    # Extract each field
+    title = safe_get_text(soup.find("h1", class_="postingtitle"))
+    company = safe_get_text(soup.find("h2", class_="company-name"))
+
+    map_elem = soup.find("div", id="map")
+    if map_elem:
+        lat = safe_get_attr(map_elem, "data-latitude")
+        lon = safe_get_attr(map_elem, "data-longitude")
+        accuracy = safe_get_attr(map_elem, "data-accuracy")
+        location = f"Lat: {lat}, Lon: {lon}, Accuracy: {accuracy}"
+    else:
+        location = "N/A"
+
+    mapaddress = soup.find("div", class_="mapaddress")
+    if mapaddress:
+        location = safe_get_text(mapaddress) + " " + location
+
+    description_elem = soup.find("section", id="postingbody")
+    if description_elem:
+        de = BeautifulSoup(str(description_elem), "html.parser")
+        qr_code_elem = de.find(class_="print-qrcode-label")
+        # Remove QR code if it exists
+        if qr_code_elem:
+            qr_code_elem.decompose()
+        description = de.text.strip()
+    else:
+        description = ''
+
+    posting_info = soup.find("div", class_="postinginfos")
+    if posting_info:
+        pi = BeautifulSoup(str(posting_info), "html.parser")
+        postinginfo_tags = pi.find_all("p", class_="postinginfo")
+        job_id = safe_get_text(postinginfo_tags[0]) if postinginfo_tags else ""
+        posted_time_elem = pi.find("time", class_="date timeago")
+        posted_time = safe_get_attr(
+            posted_time_elem, "datetime") if posted_time_elem else ""
+    else:
+        job_id = ""
+        posted_time = ""
+
+    return {
+        "url": url,
+        "title": title,
+        "company": company,
+        "location": location,
+        "description": description,
+        "id": job_id,
+        "posted_time": posted_time
+    }
+
+
+def scrape_job_data(content: str, region: str, keyword: str, seen_urls: Set[str]) -> List[List]:
+    """Parse HTML content to extract job listings."""
+    soup = BeautifulSoup(content, "html.parser")
+    listings = soup.find_all("li", class_="cl-static-search-result")
+    new_rows = []
+
+    for i, listing in enumerate(listings):
+        job_data = scrape_listings_page(listing, region, keyword, seen_urls)
+        if job_data:
+            new_rows.append(job_data)
+
+    return new_rows
+
+
+def process_region_keyword(region: str, keyword: str, seen_urls: Set[str]) -> List[List]:
+    """Process a single region and keyword."""
+    url = get_base_url().format(region=region, keyword=keyword.replace(" ", "+"))
+    if is_cached(url):
+        content = get_cached_content(url)
+        cache_status = "CACHED"
+    else:
+        content = make_request_with_retry(url, 3)
+        if content is None:
+            return []
+        cache_page(url, content)
+        cache_status = "FETCHED"
+    _ = cache_status  # no-op to silence unused var
+    return scrape_job_data(content, region, keyword, seen_urls)
diff --git a/web/static/index.js b/web/static/index.js
new file mode 100644
index 0000000..2c1d207
--- /dev/null
+++ b/web/static/index.js
@@ -0,0 +1,102 @@
+// Update the table with job data
+function updateTableData(jobs) {
+  const jobsContainer = document.getElementById("jobs");
+  jobsContainer.innerHTML = ""; // Clear existing jobs
+  jobs.forEach((job) => {
+    const jobElement = document.createElement("div");
+    jobElement.classList.add("job");
+    jobElement.innerHTML = `
+        <h3><a href="${job.url}" target="_blank">${job.title}</a></h3>
+        <p class="job-posted-time">${job.posted_time}</p>
+        <span class="job-region region-${job.region
+          .replace(" ", "")
+          .toLowerCase()}">${job.region}</span>
+        <span class="job-keyword keyword-${job.keyword
+          .replace(" ", "")
+          .toLowerCase()}">${job.keyword}</span>
+      `;
+    jobsContainer.appendChild(jobElement);
+  });
+}
+
+// Fetch job data from the server
+function fetchJobs() {
+  fetch("/jobs")
+    .then((response) => response.json())
+    .then((data) => {
+      updateTableData(data);
+    })
+    .catch((error) => console.error("Error fetching jobs:", error));
+}
+
+// scrape form submission
+function updateScrapeInfo(message, color) {
+  let scrapingInfo = document.getElementById("scrape-info");
+  scrapingInfo.style.display = "inline-block"; // Show the scraping info
+  scrapingInfo.innerText = message;
+  scrapingInfo.style.color = color;
+}
+
+function scrape(event) {
+  event.preventDefault(); // Prevent the default form submission
+  updateScrapeInfo("Scraping in progress...", "blue");
+  fetch("/scrape")
+    .then((response) => response.json())
+    .then((data) => {
+      if (data.status) {
+        updateScrapeInfo(data.status, "green");
+      } else {
+        updateScrapeInfo("Scraping failed. Please try again.", "red");
+      }
+    })
+    .catch((error) => console.error("Error:", error));
+}
+
+function updateJobsFiltered() {
+  const selectedRegion = document.getElementById("region").value;
+  const selectedKeyword = document.getElementById("keyword").value;
+  const filterForm = document.getElementById("filter-form");
+  const queryString = new URLSearchParams({
+    region: selectedRegion,
+    keyword: selectedKeyword,
+  }).toString();
+  filterForm.action = `/?${queryString}`;
+  filterForm.submit(); // Submit the form to apply filters
+}
+
+function regionClick(event) {
+  const region = event.target.innerText;
+  const regionInput = document.getElementById("region");
+  regionInput.value = region;
+  updateJobsFiltered();
+}
+
+function keywordClick(event) {
+  const keyword = event.target.innerText;
+  const keywordInput = document.getElementById("keyword");
+  keywordInput.value = keyword;
+  updateJobsFiltered();
+}
+
+document.querySelectorAll(".job-keyword").forEach((element) => {
+  element.addEventListener("click", keywordClick);
+});
+document.querySelectorAll(".job-region").forEach((element) => {
+  element.addEventListener("click", regionClick);
+});
+
+document.getElementById("scrape-form").addEventListener("submit", scrape);
+document
+  .getElementById("region")
+  .addEventListener("change", updateJobsFiltered);
+document
+  .getElementById("keyword")
+  .addEventListener("change", updateJobsFiltered);
+document
+  .getElementById("filter-form")
+  .addEventListener("submit", updateJobsFiltered);
+document.getElementById("reset-filters").addEventListener("click", () => {
+  document.getElementById("region").value = "";
+  document.getElementById("keyword").value = "";
+  updateJobsFiltered();
+});
diff --git a/web/static/settings.js b/web/static/settings.js
new file mode 100644
index 0000000..28c87b7
--- /dev/null
+++ b/web/static/settings.js
@@ -0,0 +1,61 @@
+/* javascript form handling */
+document
+  .getElementById("user-settings-form")
+  .addEventListener("submit", function (event) {
+    event.preventDefault(); // Prevent default form submission
+
+    const form = event.target;
+    const formData = new FormData(form);
+
+    // Collect selected regions and keywords
+    const selectedRegions = [];
+    const selectedKeywords = [];
+    formData.forEach((value, key) => {
+      if (key === "region") {
+        selectedRegions.push(value);
+      } else if (key === "keyword") {
+        selectedKeywords.push(value);
+      }
+    });
+
+    // Add new region if provided
+    const newRegion = formData.get("new-region").trim();
+    if (newRegion) {
+      selectedRegions.push(newRegion);
+    }
+
+    // Add new keyword if provided
+    const newKeyword = formData.get("new-keyword").trim();
+    if (newKeyword) {
+      selectedKeywords.push(newKeyword);
+    }
+
+    // Prepare data to send
+    const dataToSend = {
+      regions: selectedRegions,
+      keywords: selectedKeywords,
+      csrf_token: formData.get("csrf_token"),
+    };
+
+    // Send data via Fetch API
+    fetch(form.action, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "X-CSRF-Token": document.querySelector('meta[name="csrf-token"]')
+          .content,
+      },
+      body: JSON.stringify(dataToSend),
+    })
+      .then((response) => {
+        if (response.ok) {
+          window.location.reload(); // Reload to reflect changes
+        } else {
+          alert("Error saving preferences.");
+        }
+      })
+      .catch((error) => {
+        console.error("Error:", error);
+        alert("Error saving preferences.");
+      });
+  });
diff --git a/web/static/styles.css b/web/static/styles.css
new file mode 100644
index 0000000..c27e32a
--- /dev/null
+++ b/web/static/styles.css
@@ -0,0 +1,144 @@
+body {
+  font-family: Arial, sans-serif;
+  margin: 10px;
+  font-size: 16px;
+}
+h1 {
+  color: #333;
+  font-size: 1.2em;
+}
+a {
+  text-decoration: none;
+}
+a:hover {
+  text-decoration: underline;
+}
+footer {
+  margin-top: 20px;
+  text-align: center;
+  font-size: 0.9em;
+  color: #666;
+}
+nav {
+  margin-bottom: 10px;
+}
+
+#filters {
+  display: block;
+  margin-bottom: 1rem;
+}
+#filters #filter-form {
+  display: inline-block;
+  max-width: 500px;
+}
+#filters #scrape-form {
+  display: inline-block;
+  margin-left: 1rem;
+}
+#filters #scrape-form span#scrape-info {
+  display: none;
+  color: blue;
+  font-size: 0.9em;
+}
+#jobs {
+  margin: 0;
+  padding: 0;
+  display: grid;
+  grid-template-columns: repeat(auto-fill, minmax(360px, 1fr));
+  gap: 1rem;
+}
+.job {
+  border: 1px solid #ccc;
+  padding: 1rem;
+  border-radius: 5px;
+  background-color: #f9f9f9;
+}
+.job a {
+  display: inline-block;
+}
+.job h3 {
+  margin: 0 0 0.25rem 0;
+  font-size: 1.1em;
+}
+
+.job-posted-time {
+  font-weight: normal;
+  font-size: 0.8em;
+  color: #666;
+  margin: 0.25rem 0;
+}
+
+.job-region,
+.job-keyword {
+  border: 1px solid #ccc;
+  border-radius: 0.8rem;
+  padding: 0.2rem 0.4rem;
+  display: inline;
+  margin-right: 0.5rem;
+  background-color: rgb(255, 255, 255);
+}
+
+#job-details {
+  max-width: 100%;
+  margin: auto;
+}
+
+.job-description {
+  margin-top: 5px;
+  color: #333;
+  margin: 0;
+  padding: 0;
+  line-height: 1.25;
+  font-size: 14px;
+}
+.job-description br {
+  margin: -5px 0;
+}
+
+.job-title {
+  font-weight: bold;
+  color: #333;
+  text-decoration: underline;
+  font-size: 16px;
+}
+
+/* Taxonomy Management */
+#regions-table,
+#keywords-table {
+  margin-top: 20px;
+}
+#regions-table table,
+#keywords-table table {
+  max-width: 100%;
+  border-collapse: collapse;
+}
+#regions-table th,
+#regions-table td,
+#keywords-table th,
+#keywords-table td {
+  border: 1px solid #ccc;
+  padding: 8px;
+  text-align: left;
+}
+#regions-table th,
+#keywords-table th {
+  background-color: #f9f9f9;
+}
+
+/* Admin User Management */
+#users {
+  margin-top: 20px;
+}
+#users table {
+  max-width: 100%;
+  border-collapse: collapse;
+}
+#users th,
+#users td {
+  border: 1px solid #ccc;
+  padding: 8px;
+  text-align: left;
+}
+#users th {
+  background-color: #f9f9f9;
+}
diff --git a/web/static/taxonomy.js b/web/static/taxonomy.js
new file mode 100644
index 0000000..51b6b16
--- /dev/null
+++ b/web/static/taxonomy.js
@@ -0,0 +1,41 @@
+function updateColor(id, type, newColor) {
+  fetch("/admin/taxonomy", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      "X-CSRF-Token": document.querySelector('meta[name="csrf-token"]').content,
+    },
+    body: JSON.stringify({
+      action:
+        type === "region" ? "change_region_color" : "change_keyword_color",
+      [type + "_id"]: id,
+      [type + "_color"]: newColor,
+    }),
+  }).then((response) => {
+    if (response.ok) {
+      location.reload();
+    } else {
+      alert("Failed to update " + type + " color");
+    }
+  });
+}
+
+document
+  .getElementById("region-color-form")
+  .addEventListener("submit", function (event) {
+    event.preventDefault();
+    const regionId = this.querySelector('input[name="region_id"]').value;
+    const newColor = this.querySelector('input[name="new_region_color"]').value;
+    updateColor(regionId, "region", newColor);
+  });
+
+document
+  .getElementById("keyword-color-form")
+  .addEventListener("submit", function (event) {
+    event.preventDefault();
+    const keywordId = this.querySelector('input[name="keyword_id"]').value;
+    const newColor = this.querySelector(
+      'input[name="new_keyword_color"]'
+    ).value;
+    updateColor(keywordId, "keyword", newColor);
+  });
diff --git a/web/templates/admin/login.html b/web/templates/admin/login.html
new file mode 100644
index 0000000..2f88366
--- /dev/null
+++ b/web/templates/admin/login.html
@@ -0,0 +1,9 @@
+{% extends 'base.html' %} {% block content %}
+<h2>Login</h2>
+<form method="post">
+  <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+  <label>Username <input type="text" name="username" required /></label>
+  <label>Password <input type="password" name="password" /></label>
+  <button type="submit">Login</button>
+</form>
+{% endblock %}
diff --git a/web/templates/admin/taxonomy.html b/web/templates/admin/taxonomy.html
new file mode 100644
index 0000000..c38b82b
--- /dev/null
+++ b/web/templates/admin/taxonomy.html
@@ -0,0 +1,142 @@
+{% extends 'base.html' %} {% block content %}
+<h2>Taxonomy</h2>
+<section>
+  <h3>Regions</h3>
+  <form method="post">
+    <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+    <input type="hidden" name="action" value="add_region" />
+    <input type="text" name="region_name" placeholder="New region" required />
+    <label for="region_color">Color:</label>
+    <input type="color" name="region_color" id="region_color" value="#ffffff" />
+    <button type="submit">Add Region</button>
+  </form>
+  <div id="regions-table">
+  <table>
+    <thead>
+      <tr>
+        <th>ID</th>
+        <th>Name</th>
+        <th>Rename</th>
+        <th>Color</th>
+      </tr>
+    </thead>
+    <tbody>
+      {% for r in regions %}
+      <tr>
+        <td>{{ r.region_id }}</td>
+
+        <td>{{ r.name }}</td>
+        <td>
+          <form
+            method="post"
+            style="display: flex; gap: 0.5rem; align-items: center"
+          >
+            <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+            <input type="hidden" name="action" value="rename_region" />
+            <input type="hidden" name="region_id" value="{{ r.region_id }}" />
+            <input
+              type="text"
+              name="new_region_name"
+              placeholder="New name"
+              required
+            />
+            <button type="submit">Rename</button>
+          </form>
+        </td>
+        <td>
+          <form
+            method="post"
+            style="display: flex; gap: 0.5rem; align-items: center"
+          >
+            <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+            <input type="hidden" name="action" value="change_region_color" />
+            <input type="hidden" name="region_id" value="{{ r.region_id }}" />
+            <input
+              type="color"
+              name="new_region_color"
+              value="{{ r.color }}"
+              required
+            />
+            <button type="submit">Change Color</button>
+          </form>
+        </td>
+      </tr>
+      {% endfor %}
+    </tbody>
+  </table>
+  </div>
+</section>
+<section>
+  <h3>Keywords</h3>
+  <form method="post">
+    <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+    <input type="hidden" name="action" value="add_keyword" />
+    <input type="text" name="keyword_name" placeholder="New keyword" required />
+    <label for="keyword_color">Color:</label>
+    <input
+      type="color"
+      name="keyword_color"
+      id="keyword_color"
+      value="#ffffff"
+    />
+    <button type="submit">Add Keyword</button>
+  </form>
+  <div id="keywords-table">
+  <table>
+    <thead>
+      <tr>
+        <th>ID</th>
+        <th>Name</th>
+        <th>Rename</th>
+        <th>Color</th>
+      </tr>
+    </thead>
+    <tbody>
+      {% for k in keywords %}
+      <tr>
+        <td>{{ k.keyword_id }}</td>
+        <td>{{ k.name }}</td>
+        <td>
+          <form
+            method="post"
+            style="display: flex; gap: 0.5rem; align-items: center"
+          >
+            <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+            <input type="hidden" name="action" value="rename_keyword" />
+            <input type="hidden" name="keyword_id" value="{{ k.keyword_id }}" />
+            <input
+              type="text"
+              name="new_keyword_name"
+              placeholder="New name"
+              required
+            />
+            <button type="submit">Rename</button>
+          </form>
+        </td>
+        <td>
+          <form
+            method="post"
+            style="display: flex; gap: 0.5rem; align-items: center"
+          >
+            <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+            <input type="hidden" name="action" value="change_keyword_color" />
+            <input type="hidden" name="keyword_id" value="{{ k.keyword_id }}" />
+            <input
+              type="color"
+              name="new_keyword_color"
+              value="{{ k.color }}"
+              required
+            />
+            <button type="submit">Change Color</button>
+          </form>
+        </td>
+      </tr>
+      {% endfor %}
+    </tbody>
+  </table>
+  </div>
+</section>
+{% endblock %} {% block footer_scripts %}
+<script src="{{ url_for('static', filename='taxonomy.js') }}"></script>
+</script>
+{% endblock %}
diff --git a/web/templates/admin/users.html b/web/templates/admin/users.html
new file mode 100644
index 0000000..6696cdd
--- /dev/null
+++ b/web/templates/admin/users.html
@@ -0,0 +1,139 @@
+{% extends 'base.html' %} {% block content %}
+<div id="users">
+  <h2>Users</h2>
+  <form id="user-form" method="post" action="{{ url_for('admin_users') }}">
+    <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+    <table>
+      <thead>
+        <tr>
+          <th>ID</th>
+          <th>Username</th>
+          <th>Admin</th>
+          <th>Active</th>
+          <th colspan="2">Password</th>
+          <th>Created</th>
+          <th>Last Login</th>
+          <th></th>
+        </tr>
+      </thead>
+      <tbody>
+        {% for u in users %}
+        <tr class="user-row" data-user-id="{{ u.user_id }}">
+          <td>
+            {{ u.user_id }}<input
+              type="hidden"
+              name="user_id"
+              value="{{ u.user_id }}"
+            />
+          </td>
+          <td>
+            <input
+              type="text"
+              name="username"
+              value="{{ u.username }}"
+              required
+            />
+          </td>
+          <td>
+            <input type="checkbox" name="is_admin" {{ 'checked' if u.is_admin
+            else '' }} />
+          </td>
+          <td>
+            <input type="checkbox" name="is_active" {{ 'checked' if u.is_active
+            else '' }} />
+          </td>
+          <td>{{ '✅' if u.has_password else '❌' }}</td>
+          <td><input type="password" name="password" /></td>
+          <td>{{ u.created_at }}</td>
+          <td>{{ u.last_login or 'never' }}</td>
+          <td>
+            <button type="submit" data-user-id="{{ u.user_id }}">Save</button>
+          </td>
+        </tr>
+        {% endfor %}
+      </tbody>
+    </table>
+  </form>
+</div>
+<h3>Create / Update User</h3>
+<form
+  id="create-update-user-form"
+  method="post"
+  action="{{ url_for('admin_users') }}"
+>
+  <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+  <label>Username <input type="text" name="username" required /></label>
+  <label>Password <input type="password" name="password" /></label>
+  <label>Admin <input type="checkbox" name="is_admin" value="1" /></label>
+  <label
+    >Active <input type="checkbox" name="is_active" value="1" checked
+  /></label>
+  <button type="submit">Save</button>
+</form>
+{% endblock %} {% block footer_scripts %}
+<script>
+  function updateUser(userId) {
+    const row = document.querySelector(`.user-row[data-user-id="${userId}"]`);
+    const passwordInput = row.querySelector('input[name="password"]');
+    const hasPassword =
+      row.querySelector("td:nth-child(5)").textContent.trim() === "✅";
+    const formData = row.querySelector("form").elements;
+    const username = formData.username.value;
+    const password = hasPassword ? passwordInput.value : undefined;
+    const isAdmin = formData.is_admin.checked;
+    const isActive = formData.is_active.checked;
+
+    fetch("/admin/users", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        user_id: userId,
+        password: password,
+        username: username,
+        is_admin: isAdmin,
+        is_active: isActive,
+        csrf_token: formData.csrf_token.value,
+      }),
+    })
+      .then((response) => {
+        if (response.ok) {
+          alert("User updated successfully");
+          // Clear the password field after successful update
+          passwordInput.value = "";
+        } else {
+          alert("Error updating user");
+        }
+      })
+      .catch((error) => {
+        console.error("Error:", error);
+        alert("Error updating user");
+      });
+  }
+
+  function initUserForm() {
+    const form = document.getElementById("user-form");
+    const createUpdateForm = document.getElementById("create-update-user-form");
+
+    form.addEventListener("submit", function (event) {
+      const userId = event.target.querySelector('input[name="user_id"]').value;
+      event.preventDefault(); // Prevent the default form submission
+      updateUser(userId);
+    });
+
+    form.addEventListener("click", function (event) {
+      const userId = event.target.closest(".user-row").dataset.userId;
+      updateUser(userId);
+    });
+
+    createUpdateForm.addEventListener("submit", function (event) {
+      const passwordInput = createUpdateForm.querySelector(
+        'input[name="password"]'
+      );
+    });
+  }
+
+  initUserForm();
+</script>
+{% endblock %}
diff --git a/web/templates/base.html b/web/templates/base.html
new file mode 100644
index 0000000..23d2fb5
--- /dev/null
+++ b/web/templates/base.html
@@ -0,0 +1,43 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <title>{{ title }}</title>
+    <meta name="csrf-token" content="{{ csrf_token() }}" />
+    <link
+      rel="stylesheet"
+      href="{{ url_for('static', filename='styles.css') }}"
+    />
+    {% block styles %}{% endblock %} {% block scripts %}{% endblock %}
+  </head>
+  <body>
+    {% block header %}
+    <header>
+      <h1><a href="/">{{ title or 'Admin' }}</a></h1>
+      <nav>
+        {% if username %}<span>Hi, {{ username }}</span> | {% endif %}
+        <a href="{{ url_for('index') }}">Home</a> |
+        <a href="{{ url_for('user_settings') }}">Preferences</a> {% if
+        current_user and current_user.is_admin %} |
+        <a href="{{ url_for('admin_taxonomy') }}">Taxonomy</a> |
+        <a href="{{ url_for('admin_users') }}">Users</a> {% endif %} | {% if
+        session.get('username') %}
+        <a href="{{ url_for('logout') }}">Logout</a> {% else %} |
+        <a href="{{ url_for('login') }}">Login</a>{% endif %}
+      </nav>
+      {% with messages = get_flashed_messages() %} {% if messages %}
+      <ul>
+        {% for m in messages %}
+        <li>{{ m }}</li>
+        {% endfor %}
+      </ul>
+      {% endif %} {% endwith %}
+    </header>
+    {% endblock %} {% block content %}{% endblock %}
+    <footer>
+      <p>&copy; 2025 Job Listings</p>
+    </footer>
+    {% block footer_scripts %}{% endblock %}
+  </body>
+</html>
diff --git a/web/templates/index.html b/web/templates/index.html
new file mode 100644
index 0000000..feac3ec
--- /dev/null
+++ b/web/templates/index.html
@@ -0,0 +1,55 @@
+{% extends "base.html" %} {% block styles %}
+<style>
+/* for each keyword, create a different background color */
+{% for keyword in keywords %}
+.keyword-{{ keywords[keyword].name }} {
+  background-color: {{ keywords[keyword].color }};
+}
+{% endfor %}
+/* for each region, create a different background color */
+{% for region in regions %}
+.region-{{ region }} {
+  background-color: {{ regions[region].color }};
+}{% endfor %}
+</style>
+{% endblock %}
+{% block title %}Job Listings{% endblock %}
+{% block content %}
+<div id="filters">
+  <form id="filter-form" method="GET" action="/">
+    <label for="region">Region:</label>
+    <select name="region" id="region">
+      <option value="">All</option>
+      {% for region in regions %}
+      <option value="{{ region }}" {% if region == selected_region %}selected{% endif %}>{{ region }}</option>
+      {% endfor %}
+    </select>
+    <label for="keyword">Keyword:</label>
+    <select name="keyword" id="keyword">
+      <option value="">All</option>
+      {% for keyword in keywords %}
+      <option value="{{ keyword }}" {% if keyword == selected_keyword %}selected{% endif %}>{{ keyword }}</option>
+      {% endfor %}
+    </select>
+    <button type="submit">Filter</button>
+    <button type="button" id="reset-filters">Reset</button>
+  </form>
+  <form id="scrape-form" method="GET" action="/scrape">
+    <button type="submit">Scrape Jobs</button>
+    <span id="scrape-info"></span>
+  </form>
+</div>
+<div id="jobs">
+    {% for job in jobs %}
+    <div class="job">
+      <h3><a href="{{ job['url'] }}" target="_blank">{{ job['title'] }}</a></h3>
+      <p class="job-posted-time">{{ job['posted_time'] }}</p>
+      <span class="job-region region-{{ job['region'] }}">{{ job['region'] }}</span>
+      <span class="job-keyword keyword-{{ job['keyword']|replace(' ', '')|lower }}">{{ job['keyword'] }}</span>
+    </div>
+    {% endfor %}
+</div>
+{% endblock %}
+{% block footer_scripts %}
+<script src="{{ url_for('static', filename='index.js') }}"></script>
+{% endblock %}
\ No newline at end of file
diff --git a/web/templates/job.html b/web/templates/job.html
new file mode 100644
index 0000000..85948df
--- /dev/null
+++ b/web/templates/job.html
@@ -0,0 +1,27 @@
+{% extends "base.html" %} {% block title %}Job Details{% endblock %} {% block
+styles %}{% endblock %} {% block content %}
+<div id="job-details">
+  <p><strong>ID:</strong> {{ job.id }}</p>
+  <p>
+    <strong>Title:</strong> {{ job.title }} | <strong>Company:</strong> {{
+    job.company }} | <strong>Location:</strong> {{ job.location }}
+  </p>
+  <p>
+    <strong>Salary:</strong> {{ job.salary }} | <strong>Posted on:</strong> {{
+    job.posted_date }}
+  </p>
+
+  <h2>Job Description</h2>
+  <hr />
+  <p class="job-description">{{ job.description|safe }}</p>
+  <hr />
+  <p>
+    <strong>Original URL:</strong>
+  </p>
+  <p>
+    <a href="{{ job.url }}" target="_blank" class="job-title"
+      >{{ job.title }}</a
+    >
+  </p>
+</div>
+{% endblock %}
diff --git a/web/templates/user/settings.html b/web/templates/user/settings.html
new file mode 100644
index 0000000..01f1758
--- /dev/null
+++ b/web/templates/user/settings.html
@@ -0,0 +1,84 @@
+{% extends 'base.html' %} {% block title %}Your Preferences{% endblock %} {%
+block content %}
+<h2>Your Preferences</h2>
+<form
+  id="user-settings-form"
+  method="post"
+  action="{{ url_for('user_settings') }}"
+>
+  <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
+  <fieldset>
+    <legend>Regions</legend>
+    <p>
+      <small>Add new Region:</small>
+      <input
+        type="text"
+        name="new-region"
+        id="new-region"
+        value=""
+        placeholder="Type a region and save to add & select"
+        size="30"
+      />
+    </p>
+    {% if all_regions %} {% for r in all_regions %}
+    <label style="display: block; background-color: {{ r.color }}">
+      <input
+        type="checkbox"
+        name="region"
+        id="region-{{ r.name }}"
+        value="{{ r.name }}"
+        {%
+        if
+        r
+        in
+        user_regions
+        %}checked{%
+        endif
+        %}
+      />
+      {{ r.name }}
+    </label>
+    {% endfor %} {% else %}
+    <p>No regions available. Ask an admin to add some.</p>
+    {% endif %}
+  </fieldset>
+  <fieldset>
+    <legend>Keywords</legend>
+    <p>
+      <small>Add new Keyword:</small>
+      <input
+        type="text"
+        name="new-keyword"
+        id="new-keyword"
+        value=""
+        placeholder="Type a keyword and save to add & select"
+        size="30"
+      />
+    </p>
+    {% if all_keywords %} {% for k in all_keywords %}
+    <label style="display: block; background-color: {{ k.color }}">
+      <input
+        type="checkbox"
+        name="keyword"
+        id="keyword-{{ k.name }}"
+        value="{{ k.name }}"
+        {%
+        if
+        k
+        in
+        user_keywords
+        %}checked{%
+        endif
+        %}
+      />
+      {{ k.name }}
+    </label>
+    {% endfor %} {% else %}
+    <p>No keywords available. Ask an admin to add some.</p>
+    {% endif %}
+  </fieldset>
+  <button type="submit">Save</button>
+</form>
+{% endblock %} {% block footer_scripts %}
+<script src="{{ url_for('static', filename='settings.js') }}"></script>
+{% endblock %}
diff --git a/web/utils.py b/web/utils.py
new file mode 100644
index 0000000..1371d64
--- /dev/null
+++ b/web/utils.py
@@ -0,0 +1,336 @@
+"""
+Utility functions for the Craigslist scraper.
+"""
+
+from typing import Any, Optional as _Optional
+from datetime import datetime, UTC
+import json
+import os
+import random
+import re
+import requests
+import time
+from typing import Optional, List, Dict
+
+
+def get_config_file() -> str:
+    """Return the path to the main config file."""
+    return os.path.abspath(os.path.join(
+        os.path.dirname(__file__), '..', 'config', 'settings.json'))
+
+
+def get_config() -> dict:
+    """Return the loaded configuration dict."""
+    CONFIG = {}
+    try:
+        with open(get_config_file(), 'r', encoding='utf-8') as _f:
+            CONFIG = json.load(_f)
+    except Exception:
+        CONFIG = {}
+    return CONFIG
+
+
+def get_users_from_settings() -> List[Dict]:
+    """Return user entries from settings.json (array of dicts)."""
+    users = get_config().get('users', [])
+    if not isinstance(users, list):
+        return []
+    out: List[Dict] = []
+    for u in users:
+        if not isinstance(u, dict):
+            continue
+        username = (u.get('username') or '').strip()
+        if not username:
+            continue
+        out.append({
+            'username': username,
+            'is_admin': bool(u.get('is_admin', False)),
+            'password': u.get('password') or ''
+        })
+    return out
+
+
+def initialize_users_from_settings() -> int:
+    """Ensure users from settings.json exist in DB; set admin/active and passwords.
+
+    Returns number of users processed.
+    """
+    from web.db import create_or_update_user  # local import to avoid cycles
+    users = get_users_from_settings()
+    count = 0
+    for u in users:
+        pw = u.get('password') or None
+        create_or_update_user(u['username'], password=pw, is_admin=bool(
+            u.get('is_admin', False)), is_active=True)
+        count += 1
+    return count
+
+
+def verify_credentials(username: str, password: str) -> bool:
+    """Proxy to db.verify_user_credentials"""
+    from web.db import verify_user_credentials
+    return verify_user_credentials(username, password)
+
+
+# --- Database configuration helpers ---
+
+def get_mysql_config() -> dict:
+    """Return MySQL/MariaDB connection settings."""
+    db = get_config().get('database', {}).get('mysql', {})
+    return {
+        'host': db.get('host', '127.0.0.1'),
+        'user': db.get('user', 'root'),
+        'password': db.get('password', ''),
+        'database': db.get('database', 'jobs'),
+        'port': db.get('port', 3306),
+    }
+
+
+def get_http_setting(key: str, default=None):
+    return get_config().get('http', {}).get(key, default)
+
+
+def get_paths() -> dict:
+    return get_config().get('paths', {})
+
+
+def get_cache_dir() -> str:
+    return get_paths().get('cache_dir', 'cache')
+
+
+def get_logs_dir() -> str:
+    return get_paths().get('logs_dir', 'logs')
+
+
+def get_user_agent() -> str:
+    return get_http_setting('user_agent')
+
+
+def get_request_timeout() -> int:
+    return get_http_setting('request_timeout')
+
+
+def get_max_retries() -> int:
+    return get_http_setting('max_retries')
+
+
+def get_backoff_factor() -> int:
+    return get_http_setting('backoff_factor')
+
+
+def get_min_delay() -> int:
+    return get_http_setting('min_delay')
+
+
+def get_max_delay() -> int:
+    return get_http_setting('max_delay')
+
+
+def get_base_url() -> str:
+    return get_config().get('scraper', {}).get('base_url', "https://{region}.craigslist.org/search/jjj?query={keyword}&sort=rel")
+
+
+def ensure_cache_dir():
+    """Ensure cache directory exists."""
+    os.makedirs(get_cache_dir(), exist_ok=True)
+
+
+def now_iso() -> str:
+    """Get the current time in ISO format."""
+    return datetime.now(UTC).isoformat()
+
+
+def get_filename_from_url(url: str) -> str:
+    """Convert URL to a safe filename."""
+    return url.replace("https://", "").replace("/", "_").replace("?", "_").replace("&", "_")
+
+
+def url_to_job_id(url: str) -> int:
+    """Extract the job id from a Craigslist URL (last path segment without .html)."""
+    last = url.rstrip("/").split("/")[-1].replace(".html", "")
+    if last.isdigit():
+        return int(last)
+    return 0
+
+
+def normalize_job_id(raw_id: Optional[str], url: Optional[str]) -> Optional[int]:
+    """Normalize job id coming from details page (e.g., 'post id: 1234567890').
+    Fallback to URL-derived id when needed.
+    """
+    if raw_id:
+        m = re.search(r"(\d{5,})", raw_id)
+        if m:
+            return int(m.group(1))
+    if url:
+        return url_to_job_id(url)
+    return None
+
+
+def get_url_from_filename(name: str) -> str:
+    """Generate a URL guess based on the name."""
+    # Best-effort URL guess from filename convention (underscores to slashes)
+    base = os.path.splitext(name)[0]
+    url_guess = f"https://{base.replace('_', '/')}"
+    return url_guess
+
+
+def get_cached_content(url: str) -> str:
+    """Get cached content for URL."""
+    with open(get_cache_path(url), "r", encoding="utf-8") as f:
+        return f.read()
+
+
+def safe_get_text(element, default="N/A"):
+    """Safely extract text from BeautifulSoup element."""
+    return element.get_text(strip=True) if element else default
+
+
+def safe_get_attr(element, attr, default="N/A"):
+    """Safely extract attribute from BeautifulSoup element."""
+    return element.get(attr, default) if element else default
+
+
+def get_random_delay(min_delay: int = get_min_delay(), max_delay: int = get_max_delay()) -> float:
+    """Get a random delay between min_delay and max_delay seconds."""
+    return random.uniform(min_delay, max_delay)
+
+
+def get_cache_path(url: str) -> str:
+    """Get cache file path for URL."""
+    return os.path.join(get_cache_dir(), f"{get_filename_from_url(url)}.html")
+
+
+def cache_page(url: str, content: str):
+    """Cache the page content with a timestamp."""
+    cache_path = get_cache_path(url)
+    os.makedirs(os.path.dirname(cache_path), exist_ok=True)
+    with open(cache_path, "w", encoding="utf-8") as f:
+        f.write(content)
+    # Update the file's modification time to the current time
+    os.utime(cache_path, None)
+
+
+def is_cached(url: str) -> bool:
+    """Check if the page is cached and not older than 24 hours."""
+    cache_path = get_cache_path(url)
+    if not os.path.isfile(cache_path):
+        return False
+
+    # Check the file's age if it's a search result page
+    if 'search' in url:
+        file_age = time.time() - os.path.getmtime(cache_path)
+        if file_age > 24 * 3600:  # 24 hours in seconds
+            return False
+
+    return True
+
+
+def is_cache_stale(last_modified: str, days: int = 1) -> bool:
+    """Check if the cached page is stale (older than 24 hours)."""
+    if not last_modified:
+        return True
+    last_datetime = datetime.fromisoformat(last_modified)
+    file_age = time.time() - last_datetime.timestamp()
+    return file_age > days * 24 * 3600  # days in seconds
+
+
+def delete_cached_page(url: str):
+    cache_fp = get_cache_path(url)
+    if os.path.exists(cache_fp):
+        try:
+            os.remove(cache_fp)
+        except Exception:
+            pass
+
+
+def get_color_from_string(s: str) -> str:
+    """Generate a color code from a string."""
+    hash_code = hash(s)
+    # Ensure the hash code is positive
+    hash_code = hash_code if hash_code >= 0 else -hash_code
+    # Extract RGB components
+    r, g, b = (hash_code & 0xFF0000) >> 16, (hash_code &
+                                             0x00FF00) >> 8, hash_code & 0x0000FF
+    # ensure RGB components are within 128-255
+    r = max(128, min(255, r))
+    g = max(128, min(255, g))
+    b = max(128, min(255, b))
+    # Combine RGB components back into a single integer
+    calculated = (r << 16) | (g << 8) | b
+    return f"#{calculated:06X}"
+
+
+# ---- App helpers moved from app.py for reuse and readability -------------
+
+
+def filter_jobs(
+    jobs: List[Dict[str, Any]],
+    region: _Optional[str] = None,
+    keyword: _Optional[str] = None,
+) -> List[Dict[str, Any]]:
+    """Filter jobs by optional region and keyword."""
+    filtered = jobs
+    if region:
+        filtered = [j for j in filtered if j.get("region") == region]
+    if keyword:
+        filtered = [j for j in filtered if j.get("keyword") == keyword]
+    return filtered
+
+
+def get_job_by_id(job_id):
+    """Fetch job details by job ID from the database."""
+    try:
+        from web.db import get_all_jobs  # lazy import to avoid cycles
+        for j in get_all_jobs():
+            if str(j.get("id")) == str(job_id) or str(j.get("job_id")) == str(job_id):
+                return j
+    except Exception:
+        pass
+    return {}
+
+
+def make_request_with_retry(url: str, max_retries: int = get_max_retries()) -> Optional[str]:
+    """Make HTTP request with retry logic and proper error handling."""
+    # initial delay
+    delay = get_random_delay()
+
+    headers = {'User-Agent': get_user_agent()}
+
+    for attempt in range(max_retries):
+        try:
+            delay = get_random_delay() * (get_backoff_factor() ** attempt)
+            if attempt > 0:
+                time.sleep(delay)
+
+            resp = requests.get(url, headers=headers,
+                                timeout=get_request_timeout())
+
+            if resp.status_code == 403:
+                return None
+            elif resp.status_code == 429:
+                time.sleep(delay * 3)  # Longer delay for rate limiting
+                continue
+            elif resp.status_code == 404:
+                return None
+            elif resp.status_code == 410:
+                return None
+            elif resp.status_code >= 400:
+                if attempt == max_retries - 1:
+                    return None
+                continue
+
+            resp.raise_for_status()
+            return resp.text
+
+        except requests.exceptions.Timeout:
+            pass
+        except requests.exceptions.ConnectionError:
+            pass
+        except requests.exceptions.RequestException:
+            pass
+
+        if attempt < max_retries - 1:
+            delay = get_random_delay() * (get_backoff_factor() ** attempt)
+            time.sleep(delay)
+
+    return None