fix: update scraper settings and modify scrape form to use POST method
This commit is contained in:
@@ -14,7 +14,7 @@
|
|||||||
"max_retries": 3,
|
"max_retries": 3,
|
||||||
"backoff_factor": 2,
|
"backoff_factor": 2,
|
||||||
"min_delay": 1,
|
"min_delay": 1,
|
||||||
"max_delay": 5
|
"max_delay": 1
|
||||||
},
|
},
|
||||||
"paths": {
|
"paths": {
|
||||||
"cache_dir": "cache",
|
"cache_dir": "cache",
|
||||||
|
|||||||
12
web/app.py
12
web/app.py
@@ -272,12 +272,12 @@ def set_favorite(job_id):
|
|||||||
return jsonify({"status": "error", "message": str(e)}), 400
|
return jsonify({"status": "error", "message": str(e)}), 400
|
||||||
|
|
||||||
|
|
||||||
# Exempt JSON favorite endpoint from CSRF (uses fetch without token). Consider
|
# Exempt JSON favorite endpoint from CSRF (uses fetch without token).
|
||||||
# adding a token header client-side and removing this exemption later.
|
# TODO: add a token header client-side and remove this exemption
|
||||||
csrf.exempt(set_favorite)
|
csrf.exempt(set_favorite)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/scrape', methods=['GET'])
|
@app.route('/scrape', methods=['POST'])
|
||||||
def scrape():
|
def scrape():
|
||||||
"""Trigger the web scraping process with streaming output."""
|
"""Trigger the web scraping process with streaming output."""
|
||||||
def generate():
|
def generate():
|
||||||
@@ -290,6 +290,10 @@ def scrape():
|
|||||||
return Response(generate(), mimetype='text/plain')
|
return Response(generate(), mimetype='text/plain')
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: add a token header client-side and remove this exemption
|
||||||
|
csrf.exempt(scrape)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/scrape-page', methods=['GET'])
|
@app.route('/scrape-page', methods=['GET'])
|
||||||
def scrape_page():
|
def scrape_page():
|
||||||
"""Serve the scrape page with streaming output display."""
|
"""Serve the scrape page with streaming output display."""
|
||||||
@@ -306,7 +310,7 @@ def login():
|
|||||||
if verify_user_credentials(username, password) or username:
|
if verify_user_credentials(username, password) or username:
|
||||||
session['username'] = username
|
session['username'] = username
|
||||||
flash('Logged in')
|
flash('Logged in')
|
||||||
return redirect(url_for('admin_users'))
|
return redirect(url_for('index'))
|
||||||
flash('Invalid credentials')
|
flash('Invalid credentials')
|
||||||
return render_template('admin/login.html', title='Login')
|
return render_template('admin/login.html', title='Login')
|
||||||
|
|
||||||
|
|||||||
@@ -233,21 +233,6 @@ def scraper():
|
|||||||
except StopIteration as stop:
|
except StopIteration as stop:
|
||||||
listing_summary = stop.value if isinstance(stop.value, dict) else {}
|
listing_summary = stop.value if isinstance(stop.value, dict) else {}
|
||||||
|
|
||||||
new_jobs = []
|
|
||||||
if listing_summary:
|
|
||||||
new_jobs = listing_summary.get("new_jobs", []) or []
|
|
||||||
|
|
||||||
if new_jobs:
|
|
||||||
yield f"Preparing email alert for {len(new_jobs)} new jobs...\n"
|
|
||||||
try:
|
|
||||||
sent, info = _send_new_job_alert(new_jobs)
|
|
||||||
if sent:
|
|
||||||
yield "Job alert email sent.\n"
|
|
||||||
else:
|
|
||||||
yield f"Skipping email alert: {info}\n"
|
|
||||||
except Exception as exc:
|
|
||||||
yield f"Failed to send job alert email: {exc}\n"
|
|
||||||
|
|
||||||
# Finally, fetch and refresh individual job pages for current listings
|
# Finally, fetch and refresh individual job pages for current listings
|
||||||
job_urls = db_get_all_job_urls()
|
job_urls = db_get_all_job_urls()
|
||||||
yield f"Processing {len(job_urls)} job pages...\n"
|
yield f"Processing {len(job_urls)} job pages...\n"
|
||||||
@@ -264,6 +249,21 @@ def scraper():
|
|||||||
|
|
||||||
yield "\nScraping completed successfully!\n"
|
yield "\nScraping completed successfully!\n"
|
||||||
|
|
||||||
|
new_jobs = []
|
||||||
|
if listing_summary:
|
||||||
|
new_jobs = listing_summary.get("new_jobs", []) or []
|
||||||
|
|
||||||
|
if new_jobs:
|
||||||
|
yield f"Preparing email alert for {len(new_jobs)} new jobs...\n"
|
||||||
|
try:
|
||||||
|
sent, info = _send_new_job_alert(new_jobs)
|
||||||
|
if sent:
|
||||||
|
yield "Job alert email sent.\n"
|
||||||
|
else:
|
||||||
|
yield f"Skipping email alert: {info}\n"
|
||||||
|
except Exception as exc:
|
||||||
|
yield f"Failed to send job alert email: {exc}\n"
|
||||||
|
|
||||||
|
|
||||||
def scrape_jobs_with_retry(max_retries=3):
|
def scrape_jobs_with_retry(max_retries=3):
|
||||||
"""Run the scraping process with retry logic for failures."""
|
"""Run the scraping process with retry logic for failures."""
|
||||||
|
|||||||
@@ -29,9 +29,12 @@ function fetchJobs() {
|
|||||||
.catch((error) => console.error("Error fetching jobs:", error));
|
.catch((error) => console.error("Error fetching jobs:", error));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const scrapeForm = document.getElementById("scrape-form");
|
||||||
|
scrapeForm.addEventListener("submit", scrape);
|
||||||
|
|
||||||
// scrape form submission
|
// scrape form submission
|
||||||
function updateScrapeInfo(message, color) {
|
function updateScrapeInfo(message, color) {
|
||||||
let scrapingInfo = document.getElementById("scrape-info");
|
const scrapingInfo = document.getElementById("scrape-info");
|
||||||
scrapingInfo.style.display = "inline-block"; // Show the scraping info
|
scrapingInfo.style.display = "inline-block"; // Show the scraping info
|
||||||
scrapingInfo.innerText = message;
|
scrapingInfo.innerText = message;
|
||||||
scrapingInfo.style.color = color;
|
scrapingInfo.style.color = color;
|
||||||
@@ -40,7 +43,8 @@ function updateScrapeInfo(message, color) {
|
|||||||
function scrape(event) {
|
function scrape(event) {
|
||||||
event.preventDefault(); // Prevent the default form submission
|
event.preventDefault(); // Prevent the default form submission
|
||||||
updateScrapeInfo("Scraping in progress...", "blue");
|
updateScrapeInfo("Scraping in progress...", "blue");
|
||||||
fetch("/scrape")
|
// Send POST request to /scrape endpoint
|
||||||
|
fetch(scrapeForm.action, { method: "POST" })
|
||||||
// expect HTML response containing "Scraping completed successfully!"
|
// expect HTML response containing "Scraping completed successfully!"
|
||||||
.then((response) => response.text())
|
.then((response) => response.text())
|
||||||
.then((data) => {
|
.then((data) => {
|
||||||
@@ -82,6 +86,32 @@ function keywordClick(event) {
|
|||||||
updateJobsFiltered();
|
updateJobsFiltered();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function favoriteClick(event) {
|
||||||
|
event.preventDefault();
|
||||||
|
const button = event.target;
|
||||||
|
const username = button.getAttribute("data-username");
|
||||||
|
const jobId = button.getAttribute("data-job-id");
|
||||||
|
fetch(`/jobs/${jobId}/favorite`, { method: "POST" })
|
||||||
|
.then((response) => response.json())
|
||||||
|
.then((data) => {
|
||||||
|
if (data.status === "success") {
|
||||||
|
// Toggle favorite state in the button
|
||||||
|
if (data.is_favorite) {
|
||||||
|
button.innerText = "★";
|
||||||
|
} else {
|
||||||
|
button.innerText = "☆";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.error("Error updating favorite:", data.message);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch((error) => console.error("Error:", error));
|
||||||
|
}
|
||||||
|
|
||||||
|
document.querySelectorAll(".favorite-button").forEach((element) => {
|
||||||
|
element.addEventListener("click", favoriteClick);
|
||||||
|
});
|
||||||
|
|
||||||
document.querySelectorAll(".job-keyword").forEach((element) => {
|
document.querySelectorAll(".job-keyword").forEach((element) => {
|
||||||
element.addEventListener("click", keywordClick);
|
element.addEventListener("click", keywordClick);
|
||||||
});
|
});
|
||||||
@@ -89,7 +119,6 @@ document.querySelectorAll(".job-region").forEach((element) => {
|
|||||||
element.addEventListener("click", regionClick);
|
element.addEventListener("click", regionClick);
|
||||||
});
|
});
|
||||||
|
|
||||||
document.getElementById("scrape-form").addEventListener("submit", scrape);
|
|
||||||
document
|
document
|
||||||
.getElementById("region")
|
.getElementById("region")
|
||||||
.addEventListener("change", updateJobsFiltered);
|
.addEventListener("change", updateJobsFiltered);
|
||||||
|
|||||||
@@ -35,7 +35,7 @@
|
|||||||
<button type="button" id="reset-filters">Reset</button>
|
<button type="button" id="reset-filters">Reset</button>
|
||||||
</form>
|
</form>
|
||||||
{% if session.get('username') %}
|
{% if session.get('username') %}
|
||||||
<form id="scrape-form" method="GET" action="/scrape">
|
<form id="scrape-form" method="POST" action="{{ url_for('scrape') }}">
|
||||||
<button type="submit">Scrape Jobs</button>
|
<button type="submit">Scrape Jobs</button>
|
||||||
<span id="scrape-info"></span>
|
<span id="scrape-info"></span>
|
||||||
</form>
|
</form>
|
||||||
@@ -44,11 +44,13 @@
|
|||||||
<div id="jobs">
|
<div id="jobs">
|
||||||
{% for job in jobs %}
|
{% for job in jobs %}
|
||||||
<div class="job">
|
<div class="job">
|
||||||
<!--<h3><a href="{{ job['url'] }}" target="_blank">{{ job['title'] }}</a></h3>-->
|
|
||||||
<h3><a href="{{ url_for('job_by_id', job_id=job['id']) }}" target="_blank">{{ job['title'] }}</a></h3>
|
<h3><a href="{{ url_for('job_by_id', job_id=job['id']) }}" target="_blank">{{ job['title'] }}</a></h3>
|
||||||
<p class="job-posted-time">{{ job['posted_time'] }}</p>
|
<p class="job-posted-time">{{ job['posted_time'] }}</p>
|
||||||
<span class="job-region region-{{ job['region'] }}">{{ job['region'] }}</span>
|
<span class="job-region region-{{ job['region'] }}">{{ job['region'] }}</span>
|
||||||
<span class="job-keyword keyword-{{ job['keyword']|replace(' ', '')|lower }}">{{ job['keyword'] }}</span>
|
<span class="job-keyword keyword-{{ job['keyword']|replace(' ', '')|lower }}">{{ job['keyword'] }}</span>
|
||||||
|
<button class="favorite-button" data-job-id="{{ job['id'] }}" data-username="{{ session.get('username', '') }}">
|
||||||
|
{% if job['is_favorite'] %}★{% else %}☆{% endif %}
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
Reference in New Issue
Block a user