feat: Implement composite action for Python environment setup and refactor test workflow to utilize it
This commit is contained in:
111
.gitea/actions/setup-python-env/action.yml
Normal file
111
.gitea/actions/setup-python-env/action.yml
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
name: Setup Python Environment
|
||||||
|
description: Configure Python, proxies, dependencies, and optional database setup for CI jobs.
|
||||||
|
author: CalMiner Team
|
||||||
|
inputs:
|
||||||
|
python-version:
|
||||||
|
description: Python version to install.
|
||||||
|
required: false
|
||||||
|
default: "3.10"
|
||||||
|
install-playwright:
|
||||||
|
description: Install Playwright browsers when true.
|
||||||
|
required: false
|
||||||
|
default: "false"
|
||||||
|
install-requirements:
|
||||||
|
description: Space-delimited list of requirement files to install.
|
||||||
|
required: false
|
||||||
|
default: "requirements.txt requirements-test.txt"
|
||||||
|
run-db-setup:
|
||||||
|
description: Run database wait and setup scripts when true.
|
||||||
|
required: false
|
||||||
|
default: "true"
|
||||||
|
db-dry-run:
|
||||||
|
description: Execute setup script dry run before live run when true.
|
||||||
|
required: false
|
||||||
|
default: "true"
|
||||||
|
runs:
|
||||||
|
using: composite
|
||||||
|
steps:
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: ${{ inputs.python-version }}
|
||||||
|
- name: Configure apt proxy
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
PROXY_HOST="http://apt-cacher:3142"
|
||||||
|
if ! curl -fsS --connect-timeout 3 "${PROXY_HOST}" >/dev/null; then
|
||||||
|
PROXY_HOST="http://192.168.88.14:3142"
|
||||||
|
fi
|
||||||
|
echo "Using APT proxy ${PROXY_HOST}"
|
||||||
|
{
|
||||||
|
echo "http_proxy=${PROXY_HOST}"
|
||||||
|
echo "https_proxy=${PROXY_HOST}"
|
||||||
|
echo "HTTP_PROXY=${PROXY_HOST}"
|
||||||
|
echo "HTTPS_PROXY=${PROXY_HOST}"
|
||||||
|
} >> "$GITHUB_ENV"
|
||||||
|
sudo tee /etc/apt/apt.conf.d/01proxy >/dev/null <<EOF
|
||||||
|
Acquire::http::Proxy "${PROXY_HOST}";
|
||||||
|
Acquire::https::Proxy "${PROXY_HOST}";
|
||||||
|
EOF
|
||||||
|
- name: Install dependencies
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
requirements="${{ inputs.install-requirements }}"
|
||||||
|
if [ -n "${requirements}" ]; then
|
||||||
|
for requirement in ${requirements}; do
|
||||||
|
if [ -f "${requirement}" ]; then
|
||||||
|
pip install -r "${requirement}"
|
||||||
|
else
|
||||||
|
echo "Requirement file ${requirement} not found" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
- name: Install Playwright browsers
|
||||||
|
if: ${{ inputs.install-playwright == 'true' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
python -m playwright install --with-deps
|
||||||
|
- name: Wait for database service
|
||||||
|
if: ${{ inputs.run-db-setup == 'true' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
python - <<'PY'
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
|
||||||
|
dsn = (
|
||||||
|
f"dbname={os.environ['DATABASE_SUPERUSER_DB']} "
|
||||||
|
f"user={os.environ['DATABASE_SUPERUSER']} "
|
||||||
|
f"password={os.environ['DATABASE_SUPERUSER_PASSWORD']} "
|
||||||
|
f"host={os.environ['DATABASE_HOST']} "
|
||||||
|
f"port={os.environ['DATABASE_PORT']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
for attempt in range(30):
|
||||||
|
try:
|
||||||
|
with psycopg2.connect(dsn):
|
||||||
|
break
|
||||||
|
except psycopg2.OperationalError:
|
||||||
|
time.sleep(2)
|
||||||
|
else:
|
||||||
|
raise SystemExit("Postgres service did not become available")
|
||||||
|
PY
|
||||||
|
- name: Run database setup (dry run)
|
||||||
|
if: ${{ inputs.run-db-setup == 'true' && inputs.db-dry-run == 'true' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
python scripts/setup_database.py --ensure-database --ensure-role --ensure-schema --initialize-schema --run-migrations --seed-data --dry-run -v
|
||||||
|
- name: Run database setup
|
||||||
|
if: ${{ inputs.run-db-setup == 'true' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
python scripts/setup_database.py --ensure-database --ensure-role --ensure-schema --initialize-schema --run-migrations --seed-data -v
|
||||||
@@ -5,6 +5,18 @@ jobs:
|
|||||||
tests:
|
tests:
|
||||||
name: ${{ matrix.target }} tests
|
name: ${{ matrix.target }} tests
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
env:
|
||||||
|
DATABASE_DRIVER: postgresql
|
||||||
|
DATABASE_HOST: postgres
|
||||||
|
DATABASE_PORT: "5432"
|
||||||
|
DATABASE_NAME: calminer_ci
|
||||||
|
DATABASE_USER: calminer
|
||||||
|
DATABASE_PASSWORD: secret
|
||||||
|
DATABASE_SCHEMA: public
|
||||||
|
DATABASE_SUPERUSER: calminer
|
||||||
|
DATABASE_SUPERUSER_PASSWORD: secret
|
||||||
|
DATABASE_SUPERUSER_DB: calminer_ci
|
||||||
|
DATABASE_URL: postgresql+psycopg2://calminer:secret@postgres:5432/calminer_ci
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
@@ -24,108 +36,11 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
- name: Set up Python
|
- name: Prepare Python environment
|
||||||
uses: actions/setup-python@v5
|
uses: ./.gitea/actions/setup-python-env
|
||||||
with:
|
with:
|
||||||
python-version: "3.10"
|
install-playwright: ${{ matrix.target == 'e2e' }}
|
||||||
- name: Configure apt proxy
|
|
||||||
run: |
|
|
||||||
set -euo pipefail
|
|
||||||
PROXY_HOST="http://apt-cacher:3142"
|
|
||||||
if ! curl -fsS --connect-timeout 3 "${PROXY_HOST}" >/dev/null; then
|
|
||||||
PROXY_HOST="http://192.168.88.14:3142"
|
|
||||||
fi
|
|
||||||
echo "Using APT proxy ${PROXY_HOST}"
|
|
||||||
echo "http_proxy=${PROXY_HOST}" >> "$GITHUB_ENV"
|
|
||||||
echo "https_proxy=${PROXY_HOST}" >> "$GITHUB_ENV"
|
|
||||||
echo "HTTP_PROXY=${PROXY_HOST}" >> "$GITHUB_ENV"
|
|
||||||
echo "HTTPS_PROXY=${PROXY_HOST}" >> "$GITHUB_ENV"
|
|
||||||
sudo tee /etc/apt/apt.conf.d/01proxy >/dev/null <<EOF
|
|
||||||
Acquire::http::Proxy "${PROXY_HOST}";
|
|
||||||
Acquire::https::Proxy "${PROXY_HOST}";
|
|
||||||
EOF
|
|
||||||
# - name: Cache pip
|
|
||||||
# uses: actions/cache@v4
|
|
||||||
# with:
|
|
||||||
# path: ~/.cache/pip
|
|
||||||
# key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt', 'requirements-test.txt') }}
|
|
||||||
# restore-keys: |
|
|
||||||
# ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
|
|
||||||
# ${{ runner.os }}-pip-
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
pip install -r requirements.txt
|
|
||||||
pip install -r requirements-test.txt
|
|
||||||
- name: Install Playwright browsers
|
|
||||||
if: ${{ matrix.target == 'e2e' }}
|
|
||||||
run: |
|
|
||||||
python -m playwright install --with-deps
|
|
||||||
- name: Wait for database service
|
|
||||||
env:
|
|
||||||
DATABASE_DRIVER: postgresql
|
|
||||||
DATABASE_HOST: postgres
|
|
||||||
DATABASE_PORT: "5432"
|
|
||||||
DATABASE_NAME: calminer_ci
|
|
||||||
DATABASE_USER: calminer
|
|
||||||
DATABASE_PASSWORD: secret
|
|
||||||
DATABASE_SCHEMA: public
|
|
||||||
DATABASE_SUPERUSER: calminer
|
|
||||||
DATABASE_SUPERUSER_PASSWORD: secret
|
|
||||||
DATABASE_SUPERUSER_DB: calminer_ci
|
|
||||||
run: |
|
|
||||||
python - <<'PY'
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
|
|
||||||
import psycopg2
|
|
||||||
|
|
||||||
dsn = (
|
|
||||||
f"dbname={os.environ['DATABASE_SUPERUSER_DB']} "
|
|
||||||
f"user={os.environ['DATABASE_SUPERUSER']} "
|
|
||||||
f"password={os.environ['DATABASE_SUPERUSER_PASSWORD']} "
|
|
||||||
f"host={os.environ['DATABASE_HOST']} "
|
|
||||||
f"port={os.environ['DATABASE_PORT']}"
|
|
||||||
)
|
|
||||||
|
|
||||||
for attempt in range(30):
|
|
||||||
try:
|
|
||||||
with psycopg2.connect(dsn):
|
|
||||||
break
|
|
||||||
except psycopg2.OperationalError:
|
|
||||||
time.sleep(2)
|
|
||||||
else:
|
|
||||||
raise SystemExit("Postgres service did not become available")
|
|
||||||
PY
|
|
||||||
- name: Run database setup (dry run)
|
|
||||||
env:
|
|
||||||
DATABASE_DRIVER: postgresql
|
|
||||||
DATABASE_HOST: postgres
|
|
||||||
DATABASE_PORT: "5432"
|
|
||||||
DATABASE_NAME: calminer_ci
|
|
||||||
DATABASE_USER: calminer
|
|
||||||
DATABASE_PASSWORD: secret
|
|
||||||
DATABASE_SCHEMA: public
|
|
||||||
DATABASE_SUPERUSER: calminer
|
|
||||||
DATABASE_SUPERUSER_PASSWORD: secret
|
|
||||||
DATABASE_SUPERUSER_DB: calminer_ci
|
|
||||||
run: python scripts/setup_database.py --ensure-database --ensure-role --ensure-schema --initialize-schema --run-migrations --seed-data --dry-run -v
|
|
||||||
- name: Run database setup
|
|
||||||
env:
|
|
||||||
DATABASE_DRIVER: postgresql
|
|
||||||
DATABASE_HOST: postgres
|
|
||||||
DATABASE_PORT: "5432"
|
|
||||||
DATABASE_NAME: calminer_ci
|
|
||||||
DATABASE_USER: calminer
|
|
||||||
DATABASE_PASSWORD: secret
|
|
||||||
DATABASE_SCHEMA: public
|
|
||||||
DATABASE_SUPERUSER: calminer
|
|
||||||
DATABASE_SUPERUSER_PASSWORD: secret
|
|
||||||
DATABASE_SUPERUSER_DB: calminer_ci
|
|
||||||
run: python scripts/setup_database.py --ensure-database --ensure-role --ensure-schema --initialize-schema --run-migrations --seed-data -v
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
env:
|
|
||||||
DATABASE_URL: postgresql+psycopg2://calminer:secret@postgres:5432/calminer_ci
|
|
||||||
DATABASE_SCHEMA: public
|
|
||||||
run: |
|
run: |
|
||||||
if [ "${{ matrix.target }}" = "unit" ]; then
|
if [ "${{ matrix.target }}" = "unit" ]; then
|
||||||
pytest tests/unit
|
pytest tests/unit
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# 14 Testing, CI and Quality Assurance
|
# Testing, CI and Quality Assurance
|
||||||
|
|
||||||
This chapter centralizes the project's testing strategy, CI configuration, and quality targets.
|
This chapter centralizes the project's testing strategy, CI configuration, and quality targets.
|
||||||
|
|
||||||
@@ -37,7 +37,7 @@ CalMiner uses a combination of unit, integration, and end-to-end tests to ensure
|
|||||||
|
|
||||||
Organize tests under the `tests/` directory mirroring the application structure:
|
Organize tests under the `tests/` directory mirroring the application structure:
|
||||||
|
|
||||||
````text
|
```text
|
||||||
tests/
|
tests/
|
||||||
unit/
|
unit/
|
||||||
test_<module>.py
|
test_<module>.py
|
||||||
@@ -45,7 +45,7 @@ tests/
|
|||||||
test_<flow>.py
|
test_<flow>.py
|
||||||
fixtures/
|
fixtures/
|
||||||
conftest.py
|
conftest.py
|
||||||
```python
|
```
|
||||||
|
|
||||||
### Fixtures and Test Data
|
### Fixtures and Test Data
|
||||||
|
|
||||||
@@ -116,3 +116,103 @@ pytest tests/e2e/ --headed
|
|||||||
- Stop, remove, and relaunch the `calminer` container exposing port 8000.
|
- Stop, remove, and relaunch the `calminer` container exposing port 8000.
|
||||||
|
|
||||||
When adding new workflows, mirror this structure to ensure secrets, caching, and deployment steps remain aligned with the production environment.
|
When adding new workflows, mirror this structure to ensure secrets, caching, and deployment steps remain aligned with the production environment.
|
||||||
|
|
||||||
|
## CI Owner Coordination Notes
|
||||||
|
|
||||||
|
### Key Findings
|
||||||
|
|
||||||
|
- Self-hosted runner: ASUS System Product Name chassis with AMD Ryzen 7 7700X (8 physical cores / 16 threads) and 63.2 GB usable RAM; `act_runner` configuration not overridden, so only one workflow job runs concurrently today.
|
||||||
|
- Unit test matrix job: completes 117 pytest cases in roughly 4.1 seconds after Postgres spins up; Docker services consume ~150 MB for `postgres:16-alpine`, with minimal sustained CPU load once tests begin.
|
||||||
|
- End-to-end matrix job: `pytest tests/e2e` averages 21‑22 seconds of execution, but a cold run downloads ~179 MB of apt packages plus ~470 MB of Playwright browser bundles (Chromium, Firefox, WebKit, FFmpeg), exceeding 650 MB network transfer and adding several gigabytes of disk writes if caches are absent.
|
||||||
|
- Both jobs reuse existing Python package caches when available; absent a shared cache service, repeated Playwright installs remain the dominant cost driver for cold executions.
|
||||||
|
|
||||||
|
### Open Questions
|
||||||
|
|
||||||
|
- Can we raise the runner concurrency above the default single job, or provision an additional runner, so the test matrix can execute without serializing queued workflows?
|
||||||
|
- Is there a central cache or artifact service available for Python wheels and Playwright browser bundles to avoid ~650 MB downloads on cold starts?
|
||||||
|
- Are we permitted to bake Playwright browsers into the base runner image, or should we pursue a shared cache/proxy solution instead?
|
||||||
|
|
||||||
|
### Outreach Draft
|
||||||
|
|
||||||
|
```text
|
||||||
|
Subject: CalMiner CI parallelization support
|
||||||
|
|
||||||
|
Hi <CI Owner>,
|
||||||
|
|
||||||
|
We recently updated the CalMiner test workflow to fan out unit and Playwright E2E suites in parallel. While validating the change, we gathered the following:
|
||||||
|
|
||||||
|
- Runner host: ASUS System Product Name with AMD Ryzen 7 7700X (8 cores / 16 threads), ~63 GB RAM, default `act_runner` concurrency (1 job at a time).
|
||||||
|
- Unit job finishes in ~4.1 s once Postgres is ready; light CPU and network usage.
|
||||||
|
- E2E job finishes in ~22 s, but a cold run pulls ~179 MB of apt packages plus ~470 MB of Playwright browser payloads (>650 MB download, several GB disk writes) because we do not have a shared cache yet.
|
||||||
|
|
||||||
|
To move forward, could you help with the following?
|
||||||
|
|
||||||
|
1. Confirm whether we can raise the runner concurrency limit or provision an additional runner so parallel jobs do not queue behind one another.
|
||||||
|
2. Let us know if a central cache (Artifactory, Nexus, etc.) is available for Python wheels and Playwright browser bundles, or if we should consider baking the browsers into the runner image instead.
|
||||||
|
3. Share any guidance on preferred caching or proxy solutions for large binary installs on self-hosted runners.
|
||||||
|
|
||||||
|
Once we have clarity, we can finalize the parallel rollout and update the documentation accordingly.
|
||||||
|
|
||||||
|
Thanks,
|
||||||
|
<Your Name>
|
||||||
|
```
|
||||||
|
|
||||||
|
## Workflow Optimization Opportunities
|
||||||
|
|
||||||
|
### `test.yml`
|
||||||
|
|
||||||
|
- Run the apt-proxy setup once via a composite action or preconfigured runner image if additional matrix jobs are added.
|
||||||
|
- Collapse dependency installation into a single `pip install -r requirements-test.txt` call (includes base requirements) once caching is restored.
|
||||||
|
- Investigate caching or pre-baking Playwright browser binaries to eliminate >650 MB cold downloads per run.
|
||||||
|
|
||||||
|
### `build-and-push.yml`
|
||||||
|
|
||||||
|
- Skip QEMU setup or explicitly constrain Buildx to linux/amd64 to reduce startup time.
|
||||||
|
- Enable `cache-from` / `cache-to` settings (registry or `type=gha`) to reuse Docker build layers between runs.
|
||||||
|
|
||||||
|
### `deploy.yml`
|
||||||
|
|
||||||
|
- Extract deployment script into a reusable shell script or compose file to minimize inline secrets and ease multi-environment scaling.
|
||||||
|
- Add a post-deploy health check (e.g., `curl` readiness probe) before declaring success.
|
||||||
|
|
||||||
|
### Priority Overview
|
||||||
|
|
||||||
|
1. Restore shared caching for Python wheels and Playwright browsers once infrastructure exposes the cache service (highest impact on runtime and bandwidth; requires coordination with CI owners).
|
||||||
|
2. Enable Docker layer caching in `build-and-push.yml` to shorten build cycles (medium effort, immediate benefit to release workflows).
|
||||||
|
3. Add post-deploy health verification to `deploy.yml` (low effort, improves confidence in automation).
|
||||||
|
4. Streamline redundant setup steps in `test.yml` (medium effort once cache strategy is in place; consider composite actions or base image updates).
|
||||||
|
|
||||||
|
### Setup Consolidation Opportunities
|
||||||
|
|
||||||
|
- `Run Tests` matrix jobs each execute the apt proxy configuration, pip installs, database wait, and setup scripts. A composite action or shell script wrapper could centralize these routines and parameterize target-specific behavior (unit vs e2e) to avoid copy/paste maintenance as additional jobs (lint, type check) are introduced.
|
||||||
|
- Both the test and build workflows perform a `checkout` step; while unavoidable per workflow, shared git submodules or sparse checkout rules could be encapsulated in a composite action to keep options consistent.
|
||||||
|
- The database setup script currently runs twice (dry-run and live) for every matrix leg. Evaluate whether the dry-run remains necessary once migrations stabilize; if retained, consider adding an environment variable toggle to skip redundant seed operations for read-only suites (e.g., lint).
|
||||||
|
|
||||||
|
### Proposed Shared Setup Action
|
||||||
|
|
||||||
|
- Location: `.gitea/actions/setup-python-env/action.yml` (composite action).
|
||||||
|
- Inputs:
|
||||||
|
- `python-version` (default `3.10`): forwarded to `actions/setup-python`.
|
||||||
|
- `install-playwright` (default `false`): when `true`, run `python -m playwright install --with-deps`.
|
||||||
|
- `install-requirements` (default `requirements.txt requirements-test.txt`): space-delimited list pip installs iterate over.
|
||||||
|
- `run-db-setup` (default `true`): toggles database wait + setup scripts.
|
||||||
|
- `db-dry-run` (default `true`): controls whether the dry-run invocation executes.
|
||||||
|
- Steps encapsulated:
|
||||||
|
1. Set up Python via `actions/setup-python@v5` using provided version.
|
||||||
|
2. Configure apt proxy via shared shell snippet (with graceful fallback when proxy offline).
|
||||||
|
3. Iterate over requirement files and execute `pip install -r <file>`.
|
||||||
|
4. If `install-playwright == true`, install browsers.
|
||||||
|
5. If `run-db-setup == true`, run the wait-for-Postgres python snippet and call `scripts/setup_database.py`, honoring `db-dry-run` toggle.
|
||||||
|
- Usage sketch (in `test.yml`):
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- name: Prepare Python environment
|
||||||
|
uses: ./.gitea/actions/setup-python-env
|
||||||
|
with:
|
||||||
|
install-playwright: ${{ matrix.target == 'e2e' }}
|
||||||
|
db-dry-run: true
|
||||||
|
```
|
||||||
|
|
||||||
|
- Benefits: centralizes proxy logic and dependency installs, reduces duplication across matrix jobs, and keeps future lint/type-check jobs lightweight by disabling database setup.
|
||||||
|
- Implementation status: action available at `.gitea/actions/setup-python-env` and consumed by `test.yml`; extend to additional workflows as they adopt the shared routine.
|
||||||
|
- Obsolete steps removed: individual apt proxy, dependency install, Playwright, and database setup commands pruned from `test.yml` once the composite action was integrated.
|
||||||
|
|||||||
Reference in New Issue
Block a user