initial project commit
This commit is contained in:
82
analytics.py
Normal file
82
analytics.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import pandas as pd
|
||||
from sqlalchemy import create_engine, text
|
||||
from web.utils import get_mysql_config
|
||||
|
||||
|
||||
def get_engine():
|
||||
cfg = get_mysql_config()
|
||||
url = f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg['port']}/{cfg['database']}?charset=utf8mb4"
|
||||
return create_engine(url, future=True)
|
||||
|
||||
|
||||
def get_all_jobs():
|
||||
query = """
|
||||
SELECT l.job_id
|
||||
,l.title
|
||||
,d.description
|
||||
,l.region
|
||||
,l.keyword
|
||||
,d.company
|
||||
,l.location
|
||||
,l.timestamp
|
||||
,d.posted_time
|
||||
,l.url
|
||||
,c.file_path
|
||||
,c.last_modified
|
||||
,c.url_guess
|
||||
,CASE WHEN c.url_guess != l.url THEN 1 ELSE 0 END AS url_guess_stale
|
||||
FROM job_listings AS l
|
||||
INNER JOIN job_descriptions AS d
|
||||
ON l.job_id = d.job_id
|
||||
AND l.url = d.url
|
||||
LEFT JOIN cached_pages AS c ON l.job_id = c.job_id
|
||||
ORDER BY d.posted_time DESC
|
||||
"""
|
||||
engine = get_engine()
|
||||
with engine.begin() as conn:
|
||||
rows = conn.execute(text(query)).fetchall()
|
||||
return [
|
||||
{
|
||||
"job_id": row[0],
|
||||
"title": row[1],
|
||||
"description": row[2],
|
||||
"region": row[3],
|
||||
"keyword": row[4],
|
||||
"company": row[5],
|
||||
"location": row[6],
|
||||
"timestamp": row[7],
|
||||
"posted_time": row[8],
|
||||
"url": row[9],
|
||||
"file_path": row[10],
|
||||
"last_modified": row[11],
|
||||
"url_guess": row[12],
|
||||
"url_guess_stale": row[13],
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to load and display job postings."""
|
||||
jobs_df = pd.DataFrame(get_all_jobs())
|
||||
|
||||
print(jobs_df.head())
|
||||
print(f"Total postings: {len(jobs_df)}")
|
||||
|
||||
print("Regions:")
|
||||
print(jobs_df['region'].value_counts())
|
||||
|
||||
print("Keywords:")
|
||||
print(jobs_df['keyword'].value_counts())
|
||||
|
||||
# print("Sample Job Postings:")
|
||||
# print("-" * 40)
|
||||
# for sample in jobs_df[['region', 'keyword', 'title', 'location', 'description']].sample(5).itertuples():
|
||||
# print(
|
||||
# f"Region: {sample.region}, Keyword: {sample.keyword}, Title: {sample.title}, Location: {sample.location}")
|
||||
# print(sample.description)
|
||||
# print("-" * 40)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user