feat: implement video job management with retry and delete functionality, enhance video generation status tracking

Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
2026-04-29 18:27:59 +02:00
parent d5a94947de
commit 37edef716a
10 changed files with 479 additions and 95 deletions
+7
View File
@@ -114,6 +114,13 @@ def _run_migrations(conn: duckdb.DuckDBPyConnection) -> None:
conn.execute("""
ALTER TABLE models_cache ADD COLUMN IF NOT EXISTS output_modalities VARCHAR
""")
# Migration: add video job request params + generation type
conn.execute("""
ALTER TABLE generated_videos ADD COLUMN IF NOT EXISTS request_params VARCHAR
""")
conn.execute("""
ALTER TABLE generated_videos ADD COLUMN IF NOT EXISTS generation_type VARCHAR DEFAULT 'text_to_video'
""")
_seed_admin(conn)
+9 -1
View File
@@ -5,7 +5,9 @@ from .routers import ai
from .routers import generate
from .routers import images
from .routers import models
from .db import close_db, init_db
from .db import close_db, get_conn, get_write_lock, init_db
from .services.video_worker import run_worker
import asyncio
import os
from contextlib import asynccontextmanager
@@ -19,7 +21,13 @@ load_dotenv()
@asynccontextmanager
async def lifespan(app: FastAPI):
init_db()
worker_task = asyncio.create_task(run_worker(get_conn(), get_write_lock()))
yield
worker_task.cancel()
try:
await worker_task
except asyncio.CancelledError:
pass
close_db()
+40
View File
@@ -185,3 +185,43 @@ async def admin_mark_timed_out(_: dict = Depends(require_admin)) -> dict[str, in
conn = get_conn()
count = mark_timed_out_video_jobs(conn, timeout_minutes=120)
return {"timed_out": count}
@router.post("/videos/{job_id}/retry", status_code=200)
async def admin_retry_video_job(job_id: str, _: dict = Depends(require_admin)) -> dict[str, str]:
"""Reset a failed or cancelled video job back to 'queued' for reprocessing."""
conn = get_conn()
lock = get_write_lock()
now = datetime.now(timezone.utc)
async with lock:
row = conn.execute(
"SELECT status FROM generated_videos WHERE id = ?", [job_id]
).fetchone()
if row is None:
from fastapi import HTTPException
raise HTTPException(status_code=404, detail="Job not found")
if row[0] not in ("failed", "cancelled"):
from fastapi import HTTPException
raise HTTPException(
status_code=400, detail=f"Cannot retry job with status '{row[0]}'")
conn.execute(
"UPDATE generated_videos SET status = 'queued', updated_at = ? WHERE id = ?",
[now, job_id],
)
return {"status": "ok", "job_id": job_id}
@router.delete("/videos/{job_id}", status_code=200)
async def admin_delete_video_job(job_id: str, _: dict = Depends(require_admin)) -> dict[str, str]:
"""Permanently delete a video job record."""
conn = get_conn()
lock = get_write_lock()
async with lock:
row = conn.execute(
"SELECT id FROM generated_videos WHERE id = ?", [job_id]
).fetchone()
if row is None:
from fastapi import HTTPException
raise HTTPException(status_code=404, detail="Job not found")
conn.execute("DELETE FROM generated_videos WHERE id = ?", [job_id])
return {"status": "ok", "job_id": job_id}
+30 -73
View File
@@ -1,4 +1,5 @@
"""Generate router: text, image, video, and image-to-video generation."""
import json
from datetime import datetime, timezone
import httpx
@@ -209,54 +210,32 @@ async def generate_video(
body: VideoRequest,
current_user: dict = Depends(get_current_user),
) -> VideoResponse:
"""Generate a video from a text prompt."""
try:
result = await openrouter.generate_video(
model=body.model,
prompt=body.prompt,
duration_seconds=body.duration_seconds,
aspect_ratio=body.aspect_ratio,
resolution=body.resolution,
)
except httpx.HTTPStatusError as exc:
detail = (
f"OpenRouter API error: {exc.response.status_code} - {exc.response.text}"
)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY, detail=detail)
except Exception as exc:
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY, detail=f"OpenRouter error: {exc}"
)
"""Queue a text-to-video generation job for background processing."""
user_id = current_user.get("id") or current_user.get("sub")
job_id = result.get("id", "")
polling_url = result.get("polling_url")
job_status = result.get("status", "pending")
now = datetime.now(timezone.utc).replace(tzinfo=None)
request_params = json.dumps({
"model": body.model,
"prompt": body.prompt,
"duration_seconds": body.duration_seconds,
"aspect_ratio": body.aspect_ratio,
"resolution": body.resolution,
})
db_id = None
async with get_write_lock():
conn = get_conn()
row = conn.execute(
"""INSERT INTO generated_videos (user_id, job_id, model_id, prompt, polling_url, status, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?) RETURNING id""",
[user_id, job_id, body.model, body.prompt,
polling_url, job_status, now, now],
"""INSERT INTO generated_videos
(user_id, job_id, model_id, prompt, status, request_params, generation_type, created_at, updated_at)
VALUES (?, ?, ?, ?, 'queued', ?, 'text_to_video', ?, ?) RETURNING id""",
[user_id, "", body.model, body.prompt, request_params, now, now],
).fetchone()
if row:
db_id = str(row[0])
urls = result.get("unsigned_urls") or result.get("video_urls")
return VideoResponse(
id=job_id,
id="",
db_id=db_id,
model=body.model,
status=job_status,
polling_url=polling_url,
video_urls=urls,
video_url=(urls or [None])[0],
error=result.get("error"),
metadata=result.get("metadata"),
status="queued",
)
@@ -265,55 +244,33 @@ async def generate_video_from_image(
body: VideoFromImageRequest,
current_user: dict = Depends(get_current_user),
) -> VideoResponse:
"""Generate a video from an image and a text prompt."""
try:
result = await openrouter.generate_video_from_image(
model=body.model,
image_url=body.image_url,
prompt=body.prompt,
duration_seconds=body.duration_seconds,
aspect_ratio=body.aspect_ratio,
resolution=body.resolution,
)
except httpx.HTTPStatusError as exc:
detail = (
f"OpenRouter API error: {exc.response.status_code} - {exc.response.text}"
)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY, detail=detail)
except Exception as exc:
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY, detail=f"OpenRouter error: {exc}"
)
"""Queue an image-to-video generation job for background processing."""
user_id = current_user.get("id") or current_user.get("sub")
job_id = result.get("id", "")
polling_url = result.get("polling_url")
job_status = result.get("status", "pending")
now = datetime.now(timezone.utc).replace(tzinfo=None)
request_params = json.dumps({
"model": body.model,
"image_url": body.image_url,
"prompt": body.prompt,
"duration_seconds": body.duration_seconds,
"aspect_ratio": body.aspect_ratio,
"resolution": body.resolution,
})
db_id = None
async with get_write_lock():
conn = get_conn()
row = conn.execute(
"""INSERT INTO generated_videos (user_id, job_id, model_id, prompt, polling_url, status, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?) RETURNING id""",
[user_id, job_id, body.model, body.prompt,
polling_url, job_status, now, now],
"""INSERT INTO generated_videos
(user_id, job_id, model_id, prompt, status, request_params, generation_type, created_at, updated_at)
VALUES (?, ?, ?, ?, 'queued', ?, 'image_to_video', ?, ?) RETURNING id""",
[user_id, "", body.model, body.prompt, request_params, now, now],
).fetchone()
if row:
db_id = str(row[0])
urls = result.get("unsigned_urls") or result.get("video_urls")
return VideoResponse(
id=job_id,
id="",
db_id=db_id,
model=body.model,
status=job_status,
polling_url=polling_url,
video_urls=urls,
video_url=(urls or [None])[0],
error=result.get("error"),
metadata=result.get("metadata"),
status="queued",
)
+158
View File
@@ -0,0 +1,158 @@
"""Background worker: processes queued/processing video generation jobs."""
import asyncio
import json
import logging
from datetime import datetime, timezone
import duckdb
from . import openrouter
from .models import mark_timed_out_video_jobs
logger = logging.getLogger(__name__)
# Interval between worker ticks (seconds)
WORKER_INTERVAL = 15
# Jobs to process per tick (prevents unbounded bursts)
BATCH_SIZE = 5
async def process_queued_jobs(conn: duckdb.DuckDBPyConnection, lock: asyncio.Lock) -> int:
"""Submit queued jobs to OpenRouter and transition them to 'processing'."""
rows = conn.execute(
"""SELECT id, generation_type, request_params
FROM generated_videos
WHERE status = 'queued' AND request_params IS NOT NULL
ORDER BY created_at ASC
LIMIT ?""",
[BATCH_SIZE],
).fetchall()
processed = 0
for row in rows:
db_id, generation_type, raw_params = str(row[0]), row[1], row[2]
try:
params = json.loads(raw_params)
except (json.JSONDecodeError, TypeError):
logger.error("Bad request_params for video job %s", db_id)
continue
try:
if generation_type == "image_to_video":
result = await openrouter.generate_video_from_image(
model=params["model"],
image_url=params.get("image_url", ""),
prompt=params.get("prompt", ""),
duration_seconds=params.get("duration_seconds"),
aspect_ratio=params.get("aspect_ratio", "16:9"),
resolution=params.get("resolution"),
)
else:
result = await openrouter.generate_video(
model=params["model"],
prompt=params.get("prompt", ""),
duration_seconds=params.get("duration_seconds"),
aspect_ratio=params.get("aspect_ratio", "16:9"),
resolution=params.get("resolution"),
)
except Exception as exc:
logger.warning("OpenRouter call failed for job %s: %s", db_id, exc)
now = datetime.now(timezone.utc).replace(tzinfo=None)
async with lock:
conn.execute(
"UPDATE generated_videos SET status = 'failed', updated_at = ? WHERE id = ?",
[now, db_id],
)
continue
job_id = result.get("id", "")
polling_url = result.get("polling_url")
new_status = result.get("status", "processing")
# Normalise terminal statuses returned immediately (rare but possible)
if new_status not in ("queued", "processing", "completed", "failed", "cancelled"):
new_status = "processing"
urls = result.get("unsigned_urls") or result.get("video_urls")
video_url = (urls or [None])[0]
now = datetime.now(timezone.utc).replace(tzinfo=None)
async with lock:
conn.execute(
"""UPDATE generated_videos
SET job_id = ?, polling_url = ?, status = ?, video_url = ?, updated_at = ?
WHERE id = ?""",
[job_id, polling_url, new_status, video_url, now, db_id],
)
processed += 1
logger.info("Video job %s%s (provider id: %s)",
db_id, new_status, job_id)
return processed
async def process_processing_jobs(conn: duckdb.DuckDBPyConnection, lock: asyncio.Lock) -> int:
"""Poll in-progress jobs and update to 'completed' or 'failed'."""
rows = conn.execute(
"""SELECT id, polling_url
FROM generated_videos
WHERE status = 'processing' AND polling_url IS NOT NULL
ORDER BY updated_at ASC
LIMIT ?""",
[BATCH_SIZE],
).fetchall()
updated = 0
for row in rows:
db_id, polling_url = str(row[0]), row[1]
try:
result = await openrouter.poll_video_status(polling_url)
except Exception as exc:
logger.warning("Polling failed for job %s: %s", db_id, exc)
continue
job_status = result.get("status", "processing")
if job_status not in ("completed", "failed"):
continue # still in-progress — check again next tick
urls = result.get("unsigned_urls") or result.get("video_urls")
video_url = (urls or [None])[0]
now = datetime.now(timezone.utc).replace(tzinfo=None)
async with lock:
conn.execute(
"""UPDATE generated_videos
SET status = ?, video_url = ?, updated_at = ?
WHERE id = ?""",
[job_status, video_url, now, db_id],
)
updated += 1
logger.info("Video job %s%s", db_id, job_status)
return updated
async def worker_tick(conn: duckdb.DuckDBPyConnection, lock: asyncio.Lock) -> None:
"""Single worker tick: submit queued, poll processing, expire timed-out."""
queued = await process_queued_jobs(conn, lock)
polled = await process_processing_jobs(conn, lock)
async with lock:
timed_out = mark_timed_out_video_jobs(conn, timeout_minutes=120)
if queued or polled or timed_out:
logger.info(
"Worker tick: submitted=%d polled=%d timed_out=%d",
queued, polled, timed_out,
)
async def run_worker(conn: duckdb.DuckDBPyConnection, lock: asyncio.Lock) -> None:
"""Infinite loop: run a worker tick every WORKER_INTERVAL seconds."""
logger.info("Video worker started (interval=%ds)", WORKER_INTERVAL)
while True:
try:
await worker_tick(conn, lock)
except asyncio.CancelledError:
logger.info("Video worker stopped.")
return
except Exception as exc:
logger.exception("Unexpected error in video worker: %s", exc)
await asyncio.sleep(WORKER_INTERVAL)