feat: enhance model caching and output modalities handling

- Updated `refresh_models_cache` to include output modalities in the models cache. - Added `get_model_output_modalities` function to retrieve output modalities for a specific model. - Modified tests to cover new functionality for output modalities. - Updated OpenRouter video generation functions to support audio generation and improved error handling. - Enhanced dashboard to display generated images and videos. - Refactored frontend templates to accommodate new data structures for generated content. - Adjusted tests to validate changes in model handling and dashboard rendering. Co-authored-by: Copilot <copilot@github.com>
2026-04-29 15:20:48 +02:00
parent 3d32e6df74
commit 712c556032
15 changed files with 618 additions and 219 deletions
@@ -91,16 +91,28 @@ async def refresh_models_cache(conn: duckdb.DuckDBPyConnection) -> int:
        model_id = m.get("id", "")
        if not model_id:
            continue
+        # Full output_modalities array from architecture (for proper modalities param in image gen)
+        architecture = m.get("architecture") or {}
+        raw_output_modalities: list | None = (
+            architecture.get("output_modalities") or m.get("output_modalities")
+        )
+        output_modalities_json: str | None = (
+            json.dumps([_normalize_modality(str(v))
+                       for v in raw_output_modalities])
+            if isinstance(raw_output_modalities, list)
+            else None
+        )
        conn.execute(
            """
-            INSERT INTO models_cache (model_id, name, modality, context_length, pricing, fetched_at)
-            VALUES (?, ?, ?, ?, ?, ?)
+            INSERT INTO models_cache (model_id, name, modality, context_length, pricing, fetched_at, output_modalities)
+            VALUES (?, ?, ?, ?, ?, ?, ?)
            ON CONFLICT (model_id) DO UPDATE SET
                name = excluded.name,
                modality = excluded.modality,
                context_length = excluded.context_length,
                pricing = excluded.pricing,
-                fetched_at = excluded.fetched_at
+                fetched_at = excluded.fetched_at,
+                output_modalities = excluded.output_modalities
            """,
            [
                model_id,
@@ -109,6 +121,7 @@ async def refresh_models_cache(conn: duckdb.DuckDBPyConnection) -> int:
                m.get("context_length"),
                json.dumps(pricing) if pricing else None,
                now,
+                output_modalities_json,
            ],
        )
        count += 1
@@ -168,3 +181,20 @@ def get_cached_models(
            "pricing": pricing,
        })
    return result
+
+
+def get_model_output_modalities(
+    conn: duckdb.DuckDBPyConnection,
+    model_id: str,
+) -> list[str]:
+    """Return output_modalities list for a model; empty list if not found."""
+    row = conn.execute(
+        "SELECT output_modalities FROM models_cache WHERE model_id = ?",
+        [model_id],
+    ).fetchone()
+    if not row or not row[0]:
+        return []
+    try:
+        return json.loads(row[0])
+    except (json.JSONDecodeError, TypeError):
+        return []
@@ -95,8 +95,9 @@ async def generate_video(
    duration_seconds: int | None = None,
    aspect_ratio: str = "16:9",
    resolution: str | None = None,
+    generate_audio: bool | None = None,
 ) -> dict[str, Any]:
-    """Request text-to-video generation via OpenRouter."""
+    """Request text-to-video generation via OpenRouter POST /videos."""
    base_url = os.getenv("OPENROUTER_BASE_URL", OPENROUTER_BASE_URL)
    payload: dict[str, Any] = {
        "model": model,
@@ -104,9 +105,12 @@ async def generate_video(
        "aspect_ratio": aspect_ratio,
    }
    if duration_seconds is not None:
-        payload["duration_seconds"] = duration_seconds
+        # API uses 'duration' not 'duration_seconds'
+        payload["duration"] = duration_seconds
    if resolution is not None:
        payload["resolution"] = resolution
+    if generate_audio is not None:
+        payload["generate_audio"] = generate_audio
    async with httpx.AsyncClient(timeout=120) as client:
        resp = client.build_request(
            "POST", f"{base_url}/videos", headers=_headers(), json=payload
@@ -123,19 +127,31 @@ async def generate_video_from_image(
    duration_seconds: int | None = None,
    aspect_ratio: str = "16:9",
    resolution: str | None = None,
+    generate_audio: bool | None = None,
 ) -> dict[str, Any]:
-    """Request image-to-video generation via OpenRouter."""
+    """Request image-to-video generation via OpenRouter POST /videos.
+
+    Uses frame_images array with first_frame as per OpenRouter API spec.
+    """
    base_url = os.getenv("OPENROUTER_BASE_URL", OPENROUTER_BASE_URL)
    payload: dict[str, Any] = {
        "model": model,
-        "image_url": image_url,
        "prompt": prompt,
        "aspect_ratio": aspect_ratio,
+        "frame_images": [
+            {
+                "type": "image_url",
+                "image_url": {"url": image_url},
+                "frame_type": "first_frame",
+            }
+        ],
    }
    if duration_seconds is not None:
-        payload["duration_seconds"] = duration_seconds
+        payload["duration"] = duration_seconds
    if resolution is not None:
        payload["resolution"] = resolution
+    if generate_audio is not None:
+        payload["generate_audio"] = generate_audio
    async with httpx.AsyncClient(timeout=120) as client:
        resp = client.build_request(
            "POST", f"{base_url}/videos", headers=_headers(), json=payload
@@ -154,6 +170,18 @@ async def poll_video_status(polling_url: str) -> dict[str, Any]:
        return response.json()


+async def list_video_models() -> list[dict[str, Any]]:
+    """Return video generation models from the dedicated /videos/models endpoint."""
+    base_url = os.getenv("OPENROUTER_BASE_URL", OPENROUTER_BASE_URL)
+    async with httpx.AsyncClient(timeout=15) as client:
+        resp = client.build_request(
+            "GET", f"{base_url}/videos/models", headers=_headers()
+        )
+        response = await client.send(resp)
+        response.raise_for_status()
+        return response.json().get("data", [])
+
+
 async def generate_image_chat(
    model: str,
    prompt: str,