feat: enhance model caching and output modalities handling

- Updated `refresh_models_cache` to include output modalities in the models cache. - Added `get_model_output_modalities` function to retrieve output modalities for a specific model. - Modified tests to cover new functionality for output modalities. - Updated OpenRouter video generation functions to support audio generation and improved error handling. - Enhanced dashboard to display generated images and videos. - Refactored frontend templates to accommodate new data structures for generated content. - Adjusted tests to validate changes in model handling and dashboard rendering. Co-authored-by: Copilot <copilot@github.com>
2026-04-29 15:20:48 +02:00
parent 3d32e6df74
commit 712c556032
15 changed files with 618 additions and 219 deletions
@@ -18,15 +18,6 @@ FAKE_CHAT = {
    "usage": {"prompt_tokens": 5, "completion_tokens": 10, "total_tokens": 15},
 }

-FAKE_IMAGE = {
-    "id": "gen-img-1",
-    "model": "openai/dall-e-3",
-    "data": [
-        {"url": "https://example.com/image.png",
-            "revised_prompt": "A cat on the moon"},
-    ],
-}
-
 FAKE_VIDEO = {
    "id": "gen-vid-1",
    "polling_url": "https://openrouter.ai/api/v1/videos/gen-vid-1",
@@ -155,47 +146,13 @@ async def test_generate_text_upstream_error(client):
 # POST /generate/image
 # ---------------------------------------------------------------------------

-async def test_generate_image(client):
-    token = await _user_token(client)
-    with patch("app.routers.generate.openrouter.generate_image", new_callable=AsyncMock, return_value=FAKE_IMAGE):
-        resp = await client.post(
-            "/generate/image",
-            json={"model": "openai/dall-e-3", "prompt": "A cat on the moon"},
-            headers={"Authorization": f"Bearer {token}"},
-        )
-    assert resp.status_code == 200
-    data = resp.json()
-    assert data["id"] == "gen-img-1"
-    assert len(data["images"]) == 1
-    assert data["images"][0]["url"] == "https://example.com/image.png"
-    assert data["images"][0]["revised_prompt"] == "A cat on the moon"
-
-
-async def test_generate_image_unauthenticated(client):
-    resp = await client.post("/generate/image", json={"model": "openai/dall-e-3", "prompt": "Hi"})
-    assert resp.status_code == 401
-
-
-async def test_generate_image_upstream_error(client):
-    token = await _user_token(client)
-    with patch("app.routers.generate.openrouter.generate_image", new_callable=AsyncMock, side_effect=Exception("rate limit")):
-        resp = await client.post(
-            "/generate/image",
-            json={"model": "openai/dall-e-3", "prompt": "Hi"},
-            headers={"Authorization": f"Bearer {token}"},
-        )
-    assert resp.status_code == 502
-
-
-# --- Chat-based image generation (FLUX, GPT-5 Image Mini) ---
-
 FAKE_IMAGE_CHAT_FLUX = {
    "id": "gen-img-chat-1",
    "model": "black-forest-labs/flux.2-klein-4b",
    "choices": [{
        "message": {
            "role": "assistant",
-            "content": "Here is your generated image.",
+            "content": None,
            "images": [{
                "type": "image_url",
                "image_url": {"url": "data:image/png;base64,abc123"},
@@ -219,45 +176,65 @@ FAKE_IMAGE_CHAT_GPT5 = {
    }],
 }

+FAKE_IMAGE_CHAT_GEMINI = {
+    "id": "gen-img-chat-3",
+    "model": "google/gemini-2.5-flash-image",
+    "choices": [{
+        "message": {
+            "role": "assistant",
+            "content": "Here is your image.",
+            "images": [{
+                "type": "image_url",
+                "image_url": {"url": "data:image/png;base64,gemini123"},
+            }],
+        }
+    }],
+}

-async def test_generate_image_chat_flux(client):
+
+async def test_generate_image(client):
+    """All models now use generate_image_chat (chat completions endpoint)."""
    token = await _user_token(client)
-    with patch("app.routers.generate.openrouter.generate_image_chat", new_callable=AsyncMock, return_value=FAKE_IMAGE_CHAT_FLUX):
+    with patch("app.routers.generate.openrouter.generate_image_chat", new_callable=AsyncMock, return_value=FAKE_IMAGE_CHAT_GEMINI):
        resp = await client.post(
            "/generate/image",
-            json={"model": "black-forest-labs/flux.2-klein-4b",
-                  "prompt": "A sunset"},
+            json={"model": "google/gemini-2.5-flash-image",
+                  "prompt": "A cat on the moon"},
            headers={"Authorization": f"Bearer {token}"},
        )
    assert resp.status_code == 200
    data = resp.json()
-    assert data["id"] == "gen-img-chat-1"
+    assert data["id"] == "gen-img-chat-3"
    assert len(data["images"]) == 1
-    assert data["images"][0]["url"] == "data:image/png;base64,abc123"
+    assert data["images"][0]["url"] == "data:image/png;base64,gemini123"
+    assert data["images"][0]["image_id"] is not None  # stored in DB


-async def test_generate_image_chat_gpt5_image_mini(client):
+async def test_generate_image_unauthenticated(client):
+    resp = await client.post("/generate/image", json={"model": "google/gemini-2.5-flash-image", "prompt": "Hi"})
+    assert resp.status_code == 401
+
+
+async def test_generate_image_upstream_error(client):
    token = await _user_token(client)
-    with patch("app.routers.generate.openrouter.generate_image_chat", new_callable=AsyncMock, return_value=FAKE_IMAGE_CHAT_GPT5):
+    with patch("app.routers.generate.openrouter.generate_image_chat", new_callable=AsyncMock, side_effect=Exception("rate limit")):
        resp = await client.post(
            "/generate/image",
-            json={"model": "openai/gpt-5-image-mini", "prompt": "A cat"},
+            json={"model": "google/gemini-2.5-flash-image", "prompt": "Hi"},
            headers={"Authorization": f"Bearer {token}"},
        )
-    assert resp.status_code == 200
-    data = resp.json()
-    assert data["model"] == "openai/gpt-5-image-mini"
-    assert len(data["images"]) == 1
+    assert resp.status_code == 502


-async def test_generate_image_chat_with_image_config(client):
+async def test_generate_image_with_image_config(client):
+    """Passes aspect_ratio + image_size through to generate_image_chat."""
    token = await _user_token(client)
-    mock = AsyncMock(return_value=FAKE_IMAGE_CHAT_FLUX)
+    mock = AsyncMock(return_value=FAKE_IMAGE_CHAT_GEMINI)
    with patch("app.routers.generate.openrouter.generate_image_chat", mock):
        await client.post(
            "/generate/image",
            json={
-                "model": "black-forest-labs/flux.2-klein-4b",
+                "model": "google/gemini-2.5-flash-image",
                "prompt": "A landscape",
                "aspect_ratio": "16:9",
                "image_size": "2K",
@@ -267,23 +244,112 @@ async def test_generate_image_chat_with_image_config(client):
    call_kwargs = mock.call_args.kwargs
    assert call_kwargs["image_config"]["aspect_ratio"] == "16:9"
    assert call_kwargs["image_config"]["image_size"] == "2K"
-    assert call_kwargs["modalities"] == ["image"]


-async def test_generate_image_chat_unauthenticated(client):
-    resp = await client.post("/generate/image", json={"model": "flux.2-klein-4b", "prompt": "Hi"})
-    assert resp.status_code == 401
-
-
-async def test_generate_image_chat_upstream_error(client):
+async def test_generate_image_default_modalities_image_text(client):
+    """Model not in cache → default modalities = ['image', 'text']."""
    token = await _user_token(client)
-    with patch("app.routers.generate.openrouter.generate_image_chat", new_callable=AsyncMock, side_effect=Exception("timeout")):
+    mock = AsyncMock(return_value=FAKE_IMAGE_CHAT_GEMINI)
+    with patch("app.routers.generate.openrouter.generate_image_chat", mock):
+        await client.post(
+            "/generate/image",
+            json={"model": "google/gemini-2.5-flash-image", "prompt": "Hi"},
+            headers={"Authorization": f"Bearer {token}"},
+        )
+    assert mock.call_args.kwargs["modalities"] == ["image", "text"]
+
+
+async def test_generate_image_image_only_modalities_from_cache(client):
+    """Model cached with image-only output_modalities → modalities = ['image']."""
+    from app import db as db_module
+    from app.services.models import get_model_output_modalities
+    import json as _json
+    token = await _user_token(client)
+
+    # Seed cache with image-only model
+    conn = db_module.get_conn()
+    from datetime import datetime, timezone
+    now = datetime.now(timezone.utc).replace(tzinfo=None)
+    conn.execute(
+        "DELETE FROM models_cache WHERE model_id = 'black-forest-labs/flux.2-pro'"
+    )
+    conn.execute(
+        """INSERT INTO models_cache (model_id, name, modality, context_length, pricing, fetched_at, output_modalities)
+           VALUES (?, ?, ?, ?, ?, ?, ?)""",
+        ["black-forest-labs/flux.2-pro", "FLUX.2 Pro", "image", None, None, now,
+         _json.dumps(["image"])],
+    )
+
+    mock = AsyncMock(return_value=FAKE_IMAGE_CHAT_FLUX)
+    with patch("app.routers.generate.openrouter.generate_image_chat", mock):
        resp = await client.post(
            "/generate/image",
-            json={"model": "black-forest-labs/flux.2-klein-4b", "prompt": "Hi"},
+            json={"model": "black-forest-labs/flux.2-pro", "prompt": "Sky"},
+            headers={"Authorization": f"Bearer {token}"},
+        )
+    assert resp.status_code == 200
+    assert mock.call_args.kwargs["modalities"] == ["image"]
+
+
+async def test_generate_image_no_images_in_response(client):
+    """502 when model returns no images."""
+    token = await _user_token(client)
+    empty_response = {
+        "id": "gen-empty",
+        "model": "google/gemini-2.5-flash-image",
+        "choices": [{"message": {"role": "assistant", "content": "ok", "images": []}}],
+    }
+    with patch("app.routers.generate.openrouter.generate_image_chat",
+               new_callable=AsyncMock, return_value=empty_response):
+        resp = await client.post(
+            "/generate/image",
+            json={"model": "google/gemini-2.5-flash-image", "prompt": "Hi"},
            headers={"Authorization": f"Bearer {token}"},
        )
    assert resp.status_code == 502
+    assert "No images returned" in resp.json()["detail"]
+
+
+async def test_generate_image_flux(client):
+    """Flux model works correctly via chat completions."""
+    token = await _user_token(client)
+    with patch("app.routers.generate.openrouter.generate_image_chat",
+               new_callable=AsyncMock, return_value=FAKE_IMAGE_CHAT_FLUX):
+        resp = await client.post(
+            "/generate/image",
+            json={"model": "black-forest-labs/flux.2-klein-4b",
+                  "prompt": "A sunset"},
+            headers={"Authorization": f"Bearer {token}"},
+        )
+    assert resp.status_code == 200
+    data = resp.json()
+    assert data["images"][0]["url"] == "data:image/png;base64,abc123"
+
+
+async def test_generate_image_stored_in_db(client):
+    """Generated image row persists in generated_images table."""
+    from app import db as db_module
+    token = await _user_token(client)
+    with patch("app.routers.generate.openrouter.generate_image_chat",
+               new_callable=AsyncMock, return_value=FAKE_IMAGE_CHAT_GEMINI):
+        resp = await client.post(
+            "/generate/image",
+            json={"model": "google/gemini-2.5-flash-image",
+                  "prompt": "A mountain"},
+            headers={"Authorization": f"Bearer {token}"},
+        )
+    assert resp.status_code == 200
+    image_id = resp.json()["images"][0]["image_id"]
+    assert image_id is not None
+
+    row = db_module.get_conn().execute(
+        "SELECT model_id, prompt, image_data FROM generated_images WHERE id = ?",
+        [image_id],
+    ).fetchone()
+    assert row is not None
+    assert row[0] == "google/gemini-2.5-flash-image"
+    assert row[1] == "A mountain"
+    assert row[2] == "data:image/png;base64,gemini123"


 # ---------------------------------------------------------------------------