feat: implement background scheduler for job scraping with Gunicorn support
This commit is contained in:
22
web/app.py
22
web/app.py
@@ -4,7 +4,7 @@ from flask_wtf import CSRFProtect
|
||||
from typing import Dict, List
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from web.craigslist import scraper
|
||||
from web.craigslist import scraper, start_scheduler_in_background
|
||||
from web.db import (
|
||||
db_init,
|
||||
delete_user_by_id,
|
||||
@@ -60,6 +60,26 @@ app.static_folder = "static"
|
||||
csrf = CSRFProtect(app)
|
||||
|
||||
|
||||
def _scheduler_enabled() -> bool:
|
||||
flag = (os.environ.get("SCRAPE_SCHEDULER_ENABLED") or "").strip().lower()
|
||||
if flag not in {"1", "true", "yes", "on"}:
|
||||
return False
|
||||
# Avoid starting scheduler in Gunicorn workers (master hook handles it).
|
||||
server_software = (os.environ.get("SERVER_SOFTWARE") or "").lower()
|
||||
if "gunicorn" in server_software:
|
||||
return False
|
||||
# Avoid starting twice under Flask reloader.
|
||||
if os.environ.get("FLASK_RUN_FROM_CLI") == "true" and os.environ.get("WERKZEUG_RUN_MAIN") != "true":
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@app.before_first_request
|
||||
def _start_scheduler_if_enabled():
|
||||
if _scheduler_enabled():
|
||||
start_scheduler_in_background()
|
||||
|
||||
|
||||
def require_admin():
|
||||
username = session.get('username')
|
||||
if not username:
|
||||
|
||||
Reference in New Issue
Block a user