fix: update scraper settings and modify scrape form to use POST method
This commit is contained in:
@@ -14,7 +14,7 @@
|
||||
"max_retries": 3,
|
||||
"backoff_factor": 2,
|
||||
"min_delay": 1,
|
||||
"max_delay": 5
|
||||
"max_delay": 1
|
||||
},
|
||||
"paths": {
|
||||
"cache_dir": "cache",
|
||||
|
||||
12
web/app.py
12
web/app.py
@@ -272,12 +272,12 @@ def set_favorite(job_id):
|
||||
return jsonify({"status": "error", "message": str(e)}), 400
|
||||
|
||||
|
||||
# Exempt JSON favorite endpoint from CSRF (uses fetch without token). Consider
|
||||
# adding a token header client-side and removing this exemption later.
|
||||
# Exempt JSON favorite endpoint from CSRF (uses fetch without token).
|
||||
# TODO: add a token header client-side and remove this exemption
|
||||
csrf.exempt(set_favorite)
|
||||
|
||||
|
||||
@app.route('/scrape', methods=['GET'])
|
||||
@app.route('/scrape', methods=['POST'])
|
||||
def scrape():
|
||||
"""Trigger the web scraping process with streaming output."""
|
||||
def generate():
|
||||
@@ -290,6 +290,10 @@ def scrape():
|
||||
return Response(generate(), mimetype='text/plain')
|
||||
|
||||
|
||||
# TODO: add a token header client-side and remove this exemption
|
||||
csrf.exempt(scrape)
|
||||
|
||||
|
||||
@app.route('/scrape-page', methods=['GET'])
|
||||
def scrape_page():
|
||||
"""Serve the scrape page with streaming output display."""
|
||||
@@ -306,7 +310,7 @@ def login():
|
||||
if verify_user_credentials(username, password) or username:
|
||||
session['username'] = username
|
||||
flash('Logged in')
|
||||
return redirect(url_for('admin_users'))
|
||||
return redirect(url_for('index'))
|
||||
flash('Invalid credentials')
|
||||
return render_template('admin/login.html', title='Login')
|
||||
|
||||
|
||||
@@ -233,21 +233,6 @@ def scraper():
|
||||
except StopIteration as stop:
|
||||
listing_summary = stop.value if isinstance(stop.value, dict) else {}
|
||||
|
||||
new_jobs = []
|
||||
if listing_summary:
|
||||
new_jobs = listing_summary.get("new_jobs", []) or []
|
||||
|
||||
if new_jobs:
|
||||
yield f"Preparing email alert for {len(new_jobs)} new jobs...\n"
|
||||
try:
|
||||
sent, info = _send_new_job_alert(new_jobs)
|
||||
if sent:
|
||||
yield "Job alert email sent.\n"
|
||||
else:
|
||||
yield f"Skipping email alert: {info}\n"
|
||||
except Exception as exc:
|
||||
yield f"Failed to send job alert email: {exc}\n"
|
||||
|
||||
# Finally, fetch and refresh individual job pages for current listings
|
||||
job_urls = db_get_all_job_urls()
|
||||
yield f"Processing {len(job_urls)} job pages...\n"
|
||||
@@ -264,6 +249,21 @@ def scraper():
|
||||
|
||||
yield "\nScraping completed successfully!\n"
|
||||
|
||||
new_jobs = []
|
||||
if listing_summary:
|
||||
new_jobs = listing_summary.get("new_jobs", []) or []
|
||||
|
||||
if new_jobs:
|
||||
yield f"Preparing email alert for {len(new_jobs)} new jobs...\n"
|
||||
try:
|
||||
sent, info = _send_new_job_alert(new_jobs)
|
||||
if sent:
|
||||
yield "Job alert email sent.\n"
|
||||
else:
|
||||
yield f"Skipping email alert: {info}\n"
|
||||
except Exception as exc:
|
||||
yield f"Failed to send job alert email: {exc}\n"
|
||||
|
||||
|
||||
def scrape_jobs_with_retry(max_retries=3):
|
||||
"""Run the scraping process with retry logic for failures."""
|
||||
|
||||
@@ -29,9 +29,12 @@ function fetchJobs() {
|
||||
.catch((error) => console.error("Error fetching jobs:", error));
|
||||
}
|
||||
|
||||
const scrapeForm = document.getElementById("scrape-form");
|
||||
scrapeForm.addEventListener("submit", scrape);
|
||||
|
||||
// scrape form submission
|
||||
function updateScrapeInfo(message, color) {
|
||||
let scrapingInfo = document.getElementById("scrape-info");
|
||||
const scrapingInfo = document.getElementById("scrape-info");
|
||||
scrapingInfo.style.display = "inline-block"; // Show the scraping info
|
||||
scrapingInfo.innerText = message;
|
||||
scrapingInfo.style.color = color;
|
||||
@@ -40,7 +43,8 @@ function updateScrapeInfo(message, color) {
|
||||
function scrape(event) {
|
||||
event.preventDefault(); // Prevent the default form submission
|
||||
updateScrapeInfo("Scraping in progress...", "blue");
|
||||
fetch("/scrape")
|
||||
// Send POST request to /scrape endpoint
|
||||
fetch(scrapeForm.action, { method: "POST" })
|
||||
// expect HTML response containing "Scraping completed successfully!"
|
||||
.then((response) => response.text())
|
||||
.then((data) => {
|
||||
@@ -82,6 +86,32 @@ function keywordClick(event) {
|
||||
updateJobsFiltered();
|
||||
}
|
||||
|
||||
function favoriteClick(event) {
|
||||
event.preventDefault();
|
||||
const button = event.target;
|
||||
const username = button.getAttribute("data-username");
|
||||
const jobId = button.getAttribute("data-job-id");
|
||||
fetch(`/jobs/${jobId}/favorite`, { method: "POST" })
|
||||
.then((response) => response.json())
|
||||
.then((data) => {
|
||||
if (data.status === "success") {
|
||||
// Toggle favorite state in the button
|
||||
if (data.is_favorite) {
|
||||
button.innerText = "★";
|
||||
} else {
|
||||
button.innerText = "☆";
|
||||
}
|
||||
} else {
|
||||
console.error("Error updating favorite:", data.message);
|
||||
}
|
||||
})
|
||||
.catch((error) => console.error("Error:", error));
|
||||
}
|
||||
|
||||
document.querySelectorAll(".favorite-button").forEach((element) => {
|
||||
element.addEventListener("click", favoriteClick);
|
||||
});
|
||||
|
||||
document.querySelectorAll(".job-keyword").forEach((element) => {
|
||||
element.addEventListener("click", keywordClick);
|
||||
});
|
||||
@@ -89,7 +119,6 @@ document.querySelectorAll(".job-region").forEach((element) => {
|
||||
element.addEventListener("click", regionClick);
|
||||
});
|
||||
|
||||
document.getElementById("scrape-form").addEventListener("submit", scrape);
|
||||
document
|
||||
.getElementById("region")
|
||||
.addEventListener("change", updateJobsFiltered);
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
<button type="button" id="reset-filters">Reset</button>
|
||||
</form>
|
||||
{% if session.get('username') %}
|
||||
<form id="scrape-form" method="GET" action="/scrape">
|
||||
<form id="scrape-form" method="POST" action="{{ url_for('scrape') }}">
|
||||
<button type="submit">Scrape Jobs</button>
|
||||
<span id="scrape-info"></span>
|
||||
</form>
|
||||
@@ -44,11 +44,13 @@
|
||||
<div id="jobs">
|
||||
{% for job in jobs %}
|
||||
<div class="job">
|
||||
<!--<h3><a href="{{ job['url'] }}" target="_blank">{{ job['title'] }}</a></h3>-->
|
||||
<h3><a href="{{ url_for('job_by_id', job_id=job['id']) }}" target="_blank">{{ job['title'] }}</a></h3>
|
||||
<p class="job-posted-time">{{ job['posted_time'] }}</p>
|
||||
<span class="job-region region-{{ job['region'] }}">{{ job['region'] }}</span>
|
||||
<span class="job-keyword keyword-{{ job['keyword']|replace(' ', '')|lower }}">{{ job['keyword'] }}</span>
|
||||
<button class="favorite-button" data-job-id="{{ job['id'] }}" data-username="{{ session.get('username', '') }}">
|
||||
{% if job['is_favorite'] %}★{% else %}☆{% endif %}
|
||||
</button>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user