initial project commit

This commit is contained in:
georg.sinn-schirwitz
2025-08-29 15:07:58 +02:00
parent 38708e6d1d
commit 23a67d7fe1
31 changed files with 3433 additions and 0 deletions

82
analytics.py Normal file
View File

@@ -0,0 +1,82 @@
import pandas as pd
from sqlalchemy import create_engine, text
from web.utils import get_mysql_config
def get_engine():
cfg = get_mysql_config()
url = f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg['port']}/{cfg['database']}?charset=utf8mb4"
return create_engine(url, future=True)
def get_all_jobs():
query = """
SELECT l.job_id
,l.title
,d.description
,l.region
,l.keyword
,d.company
,l.location
,l.timestamp
,d.posted_time
,l.url
,c.file_path
,c.last_modified
,c.url_guess
,CASE WHEN c.url_guess != l.url THEN 1 ELSE 0 END AS url_guess_stale
FROM job_listings AS l
INNER JOIN job_descriptions AS d
ON l.job_id = d.job_id
AND l.url = d.url
LEFT JOIN cached_pages AS c ON l.job_id = c.job_id
ORDER BY d.posted_time DESC
"""
engine = get_engine()
with engine.begin() as conn:
rows = conn.execute(text(query)).fetchall()
return [
{
"job_id": row[0],
"title": row[1],
"description": row[2],
"region": row[3],
"keyword": row[4],
"company": row[5],
"location": row[6],
"timestamp": row[7],
"posted_time": row[8],
"url": row[9],
"file_path": row[10],
"last_modified": row[11],
"url_guess": row[12],
"url_guess_stale": row[13],
}
for row in rows
]
def main():
"""Main function to load and display job postings."""
jobs_df = pd.DataFrame(get_all_jobs())
print(jobs_df.head())
print(f"Total postings: {len(jobs_df)}")
print("Regions:")
print(jobs_df['region'].value_counts())
print("Keywords:")
print(jobs_df['keyword'].value_counts())
# print("Sample Job Postings:")
# print("-" * 40)
# for sample in jobs_df[['region', 'keyword', 'title', 'location', 'description']].sample(5).itertuples():
# print(
# f"Region: {sample.region}, Keyword: {sample.keyword}, Title: {sample.title}, Location: {sample.location}")
# print(sample.description)
# print("-" * 40)
if __name__ == "__main__":
main()