fix: ensure scheduler starts only once during Flask requests
Some checks failed
CI/CD Pipeline / test (push) Successful in 1m34s
CI/CD Pipeline / build-image (push) Failing after 1m47s

This commit is contained in:
2026-01-22 16:55:18 +01:00
parent d84b8f128b
commit 446c432c18
2 changed files with 40 additions and 3 deletions

View File

@@ -153,3 +153,23 @@ class TestScheduler:
assert search_data.get(("losangeles", "python")) == 4 assert search_data.get(("losangeles", "python")) == 4
assert search_data.get(("losangeles", "java")) == 1 assert search_data.get(("losangeles", "java")) == 1
assert result.get("discovered") == 10 # Total unique jobs assert result.get("discovered") == 10 # Total unique jobs
def test_app_scheduler_starts_once(monkeypatch):
"""Ensure the Flask before_request hook starts scheduler only once."""
import web.app as app_module
monkeypatch.setenv("SCRAPE_SCHEDULER_ENABLED", "true")
monkeypatch.delenv("SERVER_SOFTWARE", raising=False)
monkeypatch.delenv("FLASK_RUN_FROM_CLI", raising=False)
monkeypatch.delenv("WERKZEUG_RUN_MAIN", raising=False)
app_module._scheduler_started = False
with patch("web.app.start_scheduler_in_background") as mock_start:
app_module.app.config.update(TESTING=True, WTF_CSRF_ENABLED=False)
with app_module.app.test_client() as client:
client.get("/health")
client.get("/health")
assert mock_start.call_count == 1

View File

@@ -1,4 +1,5 @@
import os import os
import threading
from flask import Flask, request, jsonify, render_template, redirect, url_for, session, flash, Response from flask import Flask, request, jsonify, render_template, redirect, url_for, session, flash, Response
from flask_wtf import CSRFProtect from flask_wtf import CSRFProtect
from typing import Dict, List from typing import Dict, List
@@ -60,6 +61,10 @@ app.static_folder = "static"
csrf = CSRFProtect(app) csrf = CSRFProtect(app)
_scheduler_started = False
_scheduler_lock = threading.Lock()
def _scheduler_enabled() -> bool: def _scheduler_enabled() -> bool:
flag = (os.environ.get("SCRAPE_SCHEDULER_ENABLED") or "").strip().lower() flag = (os.environ.get("SCRAPE_SCHEDULER_ENABLED") or "").strip().lower()
if flag not in {"1", "true", "yes", "on"}: if flag not in {"1", "true", "yes", "on"}:
@@ -74,10 +79,22 @@ def _scheduler_enabled() -> bool:
return True return True
@app.before_first_request def _maybe_start_scheduler() -> None:
def _start_scheduler_if_enabled(): global _scheduler_started
if _scheduler_enabled(): if _scheduler_started:
return
if not _scheduler_enabled():
return
with _scheduler_lock:
if _scheduler_started:
return
start_scheduler_in_background() start_scheduler_in_background()
_scheduler_started = True
@app.before_request
def _start_scheduler_if_enabled():
_maybe_start_scheduler()
def require_admin(): def require_admin():