From 3fcf3574e6419359fee0d7a0f8caf8b69fbaa2c6 Mon Sep 17 00:00:00 2001 From: "georg.sinn-schirwitz" Date: Fri, 29 Aug 2025 23:01:46 +0200 Subject: [PATCH] remove analytics --- analytics.py | 82 ------------------------------------------------ requirements.txt | 1 - 2 files changed, 83 deletions(-) delete mode 100644 analytics.py diff --git a/analytics.py b/analytics.py deleted file mode 100644 index d17c1ee..0000000 --- a/analytics.py +++ /dev/null @@ -1,82 +0,0 @@ -import pandas as pd -from sqlalchemy import create_engine, text -from web.utils import get_mysql_config - - -def get_engine(): - cfg = get_mysql_config() - url = f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg['port']}/{cfg['database']}?charset=utf8mb4" - return create_engine(url, future=True) - - -def get_all_jobs(): - query = """ -SELECT l.job_id -,l.title -,d.description -,l.region -,l.keyword -,d.company -,l.location -,l.timestamp -,d.posted_time -,l.url -,c.file_path -,c.last_modified -,c.url_guess -,CASE WHEN c.url_guess != l.url THEN 1 ELSE 0 END AS url_guess_stale -FROM job_listings AS l -INNER JOIN job_descriptions AS d -ON l.job_id = d.job_id -AND l.url = d.url -LEFT JOIN cached_pages AS c ON l.job_id = c.job_id -ORDER BY d.posted_time DESC - """ - engine = get_engine() - with engine.begin() as conn: - rows = conn.execute(text(query)).fetchall() - return [ - { - "job_id": row[0], - "title": row[1], - "description": row[2], - "region": row[3], - "keyword": row[4], - "company": row[5], - "location": row[6], - "timestamp": row[7], - "posted_time": row[8], - "url": row[9], - "file_path": row[10], - "last_modified": row[11], - "url_guess": row[12], - "url_guess_stale": row[13], - } - for row in rows - ] - - -def main(): - """Main function to load and display job postings.""" - jobs_df = pd.DataFrame(get_all_jobs()) - - print(jobs_df.head()) - print(f"Total postings: {len(jobs_df)}") - - print("Regions:") - print(jobs_df['region'].value_counts()) - - print("Keywords:") - print(jobs_df['keyword'].value_counts()) - - # print("Sample Job Postings:") - # print("-" * 40) - # for sample in jobs_df[['region', 'keyword', 'title', 'location', 'description']].sample(5).itertuples(): - # print( - # f"Region: {sample.region}, Keyword: {sample.keyword}, Title: {sample.title}, Location: {sample.location}") - # print(sample.description) - # print("-" * 40) - - -if __name__ == "__main__": - main() diff --git a/requirements.txt b/requirements.txt index 0dc108d..91e7e36 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ beautifulsoup4 flask flask-wtf -pandas pytest requests sqlalchemy