fix: ensure scheduler starts only once during Flask requests
This commit is contained in:
@@ -153,3 +153,23 @@ class TestScheduler:
|
|||||||
assert search_data.get(("losangeles", "python")) == 4
|
assert search_data.get(("losangeles", "python")) == 4
|
||||||
assert search_data.get(("losangeles", "java")) == 1
|
assert search_data.get(("losangeles", "java")) == 1
|
||||||
assert result.get("discovered") == 10 # Total unique jobs
|
assert result.get("discovered") == 10 # Total unique jobs
|
||||||
|
|
||||||
|
|
||||||
|
def test_app_scheduler_starts_once(monkeypatch):
|
||||||
|
"""Ensure the Flask before_request hook starts scheduler only once."""
|
||||||
|
import web.app as app_module
|
||||||
|
|
||||||
|
monkeypatch.setenv("SCRAPE_SCHEDULER_ENABLED", "true")
|
||||||
|
monkeypatch.delenv("SERVER_SOFTWARE", raising=False)
|
||||||
|
monkeypatch.delenv("FLASK_RUN_FROM_CLI", raising=False)
|
||||||
|
monkeypatch.delenv("WERKZEUG_RUN_MAIN", raising=False)
|
||||||
|
|
||||||
|
app_module._scheduler_started = False
|
||||||
|
|
||||||
|
with patch("web.app.start_scheduler_in_background") as mock_start:
|
||||||
|
app_module.app.config.update(TESTING=True, WTF_CSRF_ENABLED=False)
|
||||||
|
with app_module.app.test_client() as client:
|
||||||
|
client.get("/health")
|
||||||
|
client.get("/health")
|
||||||
|
|
||||||
|
assert mock_start.call_count == 1
|
||||||
|
|||||||
23
web/app.py
23
web/app.py
@@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
import threading
|
||||||
from flask import Flask, request, jsonify, render_template, redirect, url_for, session, flash, Response
|
from flask import Flask, request, jsonify, render_template, redirect, url_for, session, flash, Response
|
||||||
from flask_wtf import CSRFProtect
|
from flask_wtf import CSRFProtect
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
@@ -60,6 +61,10 @@ app.static_folder = "static"
|
|||||||
csrf = CSRFProtect(app)
|
csrf = CSRFProtect(app)
|
||||||
|
|
||||||
|
|
||||||
|
_scheduler_started = False
|
||||||
|
_scheduler_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
def _scheduler_enabled() -> bool:
|
def _scheduler_enabled() -> bool:
|
||||||
flag = (os.environ.get("SCRAPE_SCHEDULER_ENABLED") or "").strip().lower()
|
flag = (os.environ.get("SCRAPE_SCHEDULER_ENABLED") or "").strip().lower()
|
||||||
if flag not in {"1", "true", "yes", "on"}:
|
if flag not in {"1", "true", "yes", "on"}:
|
||||||
@@ -74,10 +79,22 @@ def _scheduler_enabled() -> bool:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
@app.before_first_request
|
def _maybe_start_scheduler() -> None:
|
||||||
def _start_scheduler_if_enabled():
|
global _scheduler_started
|
||||||
if _scheduler_enabled():
|
if _scheduler_started:
|
||||||
|
return
|
||||||
|
if not _scheduler_enabled():
|
||||||
|
return
|
||||||
|
with _scheduler_lock:
|
||||||
|
if _scheduler_started:
|
||||||
|
return
|
||||||
start_scheduler_in_background()
|
start_scheduler_in_background()
|
||||||
|
_scheduler_started = True
|
||||||
|
|
||||||
|
|
||||||
|
@app.before_request
|
||||||
|
def _start_scheduler_if_enabled():
|
||||||
|
_maybe_start_scheduler()
|
||||||
|
|
||||||
|
|
||||||
def require_admin():
|
def require_admin():
|
||||||
|
|||||||
Reference in New Issue
Block a user