thc-webhook/test_print_timezones.py

import pytz
from datetime import datetime
from csv import DictReader


def get_tz_info(tz_name: str, timezones: list[dict]) -> dict | None:
    """Get timezone info by name."""
    return next((tz for tz in timezones if tz["zone_name"] == tz_name), None)


def get_country_info(country_code: str, countries: list[dict]) -> dict | None:
    """Get country info by country code."""
    return next((c for c in countries if c["country_code"] == country_code), None)


def where_is_it_420(timezones: list[dict], countries: list[dict]) -> list[str]:
    """Get timezones where the current hour is 4 or 16, indicating it's 4:20 there.

    Returns:
        list[str]: A list of timezones where it's currently 4:20 PM or AM.
    """
    tz_list = []
    for tz in pytz.all_timezones:
        now = datetime.now(pytz.timezone(tz))
        if now.hour == 4 or now.hour == 16:
            # Find the timezone in the loaded timezones
            tz_info = get_tz_info(tz, timezones)
            if tz_info:
                country = get_country_info(tz_info["country_code"], countries)
                if country:
                    country_name = country["country_name"].strip().strip('"')
                    if country_name not in tz_list:
                        tz_list.append(country_name)
    return tz_list


def load_tz_file():
    timezone_file = "./tzdb/TimeZoneDB.csv/time_zone.csv"
    # column names in the csv
    timezone_names = ["zone_name", "country_code",
                      "abbreviation", "time_start", "gmt_offset", "dst"]
    # columns to load
    load_columns = ["zone_name", "country_code"]
    # read csv
    with open(timezone_file, newline='') as csvfile:
        reader = DictReader(csvfile, fieldnames=timezone_names)
        csv = [row for row in reader]
    # drop all columns except load_columns
    csv = [{k: v for k, v in row.items() if k in load_columns} for row in csv]
    # distinct zone_names
    seen = set()
    unique_csv = []
    for row in csv:
        if row["zone_name"] not in seen:
            seen.add(row["zone_name"])
            unique_csv.append(row)
    csv = unique_csv

    return csv


def main():

    # read csv file and load timezones and countries
    csv = load_tz_file()

    # split zone_name into components by "/"
    for row in csv:
        parts = row["zone_name"].split("/", 1)
        row["region"] = parts[0]
        row["city"] = parts[1] if len(parts) > 1 else None
    # drop regions with no country_code (like Etc, GMT, etc)
    csv = [row for row in csv if row["country_code"]]

    # get all timezones from pytz and split into region and city

    tz = [{"zone_name": tz} for tz in pytz.all_timezones]

    # split zone_name into components by "/"
    for row in tz:
        parts = row["zone_name"].split("/", 1)
        row["region"] = parts[0]
        row["city"] = parts[1] if len(parts) > 1 else None
    # drop regions with no city (like UTC, GMT, etc)
    tz = [row for row in tz if row["city"]]
    # drop rows where region is 'Etc'
    tz = [row for row in tz if row["region"] != "Etc"]

    # join data on region and city
    timezones = []
    for tz_row in tz:
        for csv_row in csv:
            if tz_row["region"] == csv_row["region"] and tz_row["city"] == csv_row["city"]:
                timezones.append({
                    "zone_name": tz_row["zone_name"],
                    "country_code": csv_row["country_code"],
                    "region": tz_row["region"],
                    "city": tz_row["city"],
                })
                break

    # reorder columns
    timezones = [{k: row[k] for k in ['region', 'city', 'country_code']}
                 for row in timezones]

    # print merged data
    print(f"Merged timezones: {len(timezones)}")
    print(timezones[:20])
    regions = set(row['region'] for row in timezones)
    for region in regions:
        df_region = [row for row in timezones if row['region'] == region]
        print(f"{len(df_region)} merged in {region}")


if __name__ == "__main__":
    main()