Add aspect ratio and image size options to image generation; implement chat-based image generation handling

Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
2026-04-27 19:28:04 +02:00
parent 17ae8d9477
commit 58c2cb4490
6 changed files with 211 additions and 18 deletions
+2
View File
@@ -53,6 +53,8 @@ class ImageRequest(BaseModel):
prompt: str
n: int = 1
size: str = "1024x1024"
aspect_ratio: str | None = None # e.g. "1:1", "16:9", "9:16"
image_size: str | None = None # e.g. "0.5K", "1K", "2K", "4K"
class ImageResult(BaseModel):
+56 -18
View File
@@ -58,31 +58,69 @@ async def generate_image(
_: dict = Depends(get_current_user),
) -> ImageResponse:
"""Generate images from a text prompt."""
# Detect if model uses chat completions (FLUX, GPT-5 Image Mini) vs /images/generations (DALL-E)
chat_models = {"black-forest-labs/flux.2-klein-4b",
"openai/gpt-5-image-mini"}
is_chat_model = body.model.lower() in {m.lower() for m in chat_models} or \
any(m in body.model.lower() for m in ["flux", "gpt-5-image-mini"])
try:
result = await openrouter.generate_image(
model=body.model,
prompt=body.prompt,
n=body.n,
size=body.size,
)
if is_chat_model:
image_config = {}
if body.aspect_ratio:
image_config["aspect_ratio"] = body.aspect_ratio
if body.image_size:
image_config["image_size"] = body.image_size
result = await openrouter.generate_image_chat(
model=body.model,
prompt=body.prompt,
modalities=[
"image", "text"] if "gpt-5-image-mini" in body.model.lower() else ["image"],
image_config=image_config if image_config else None,
)
else:
result = await openrouter.generate_image(
model=body.model,
prompt=body.prompt,
n=body.n,
size=body.size,
)
except Exception as exc:
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY, detail=f"OpenRouter error: {exc}")
try:
images = [
ImageResult(
url=item.get("url"),
b64_json=item.get("b64_json"),
revised_prompt=item.get("revised_prompt"),
if is_chat_model:
# Chat completions response: choices[0].message.images[].image_url.url
images = []
message = result.get("choices", [{}])[0].get("message", {})
for item in message.get("images", []):
img_url = item.get("image_url", {}).get("url")
images.append(ImageResult(
url=img_url,
b64_json=None,
revised_prompt=message.get("content"),
))
return ImageResponse(
id=result.get("id", ""),
model=result.get("model", body.model),
images=images,
)
else:
# /images/generations response: data[].url
images = [
ImageResult(
url=item.get("url"),
b64_json=item.get("b64_json"),
revised_prompt=item.get("revised_prompt"),
)
for item in result.get("data", [])
]
return ImageResponse(
id=result.get("id", ""),
model=result.get("model", body.model),
images=images,
)
for item in result.get("data", [])
]
return ImageResponse(
id=result.get("id", ""),
model=result.get("model", body.model),
images=images,
)
except (KeyError, TypeError) as exc:
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Unexpected response format: {exc}")
+31
View File
@@ -139,3 +139,34 @@ async def poll_video_status(polling_url: str) -> dict[str, Any]:
response = await client.send(resp)
response.raise_for_status()
return response.json()
async def generate_image_chat(
model: str,
prompt: str,
modalities: list[str] | None = None,
image_config: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""Request image generation via Chat Completions with modalities.
Used by models like FLUX.2 Klein 4B and GPT-5 Image Mini that output
images through the chat completions endpoint rather than /images/generations.
"""
base_url = os.getenv("OPENROUTER_BASE_URL", OPENROUTER_BASE_URL)
if modalities is None:
# Image-only models (FLUX) vs multimodal (GPT-5 Image Mini)
modalities = ["image"]
payload: dict[str, Any] = {
"model": model,
"messages": [{"role": "user", "content": prompt}],
"modalities": modalities,
}
if image_config:
payload["image_config"] = image_config
async with httpx.AsyncClient(timeout=120) as client:
resp = client.build_request(
"POST", f"{base_url}/chat/completions", headers=_headers(), json=payload
)
response = await client.send(resp)
response.raise_for_status()
return response.json()
+99
View File
@@ -149,6 +149,105 @@ async def test_generate_image_upstream_error(client):
assert resp.status_code == 502
# --- Chat-based image generation (FLUX, GPT-5 Image Mini) ---
FAKE_IMAGE_CHAT_FLUX = {
"id": "gen-img-chat-1",
"model": "black-forest-labs/flux.2-klein-4b",
"choices": [{
"message": {
"role": "assistant",
"content": "Here is your generated image.",
"images": [{
"type": "image_url",
"image_url": {"url": "data:image/png;base64,abc123"},
}],
}
}],
}
FAKE_IMAGE_CHAT_GPT5 = {
"id": "gen-img-chat-2",
"model": "openai/gpt-5-image-mini",
"choices": [{
"message": {
"role": "assistant",
"content": "Generated image.",
"images": [{
"type": "image_url",
"image_url": {"url": "data:image/png;base64,xyz789"},
}],
}
}],
}
async def test_generate_image_chat_flux(client):
token = await _user_token(client)
with patch("backend.app.routers.generate.openrouter.generate_image_chat", new_callable=AsyncMock, return_value=FAKE_IMAGE_CHAT_FLUX):
resp = await client.post(
"/generate/image",
json={"model": "black-forest-labs/flux.2-klein-4b",
"prompt": "A sunset"},
headers={"Authorization": f"Bearer {token}"},
)
assert resp.status_code == 200
data = resp.json()
assert data["id"] == "gen-img-chat-1"
assert len(data["images"]) == 1
assert data["images"][0]["url"] == "data:image/png;base64,abc123"
async def test_generate_image_chat_gpt5_image_mini(client):
token = await _user_token(client)
with patch("backend.app.routers.generate.openrouter.generate_image_chat", new_callable=AsyncMock, return_value=FAKE_IMAGE_CHAT_GPT5):
resp = await client.post(
"/generate/image",
json={"model": "openai/gpt-5-image-mini", "prompt": "A cat"},
headers={"Authorization": f"Bearer {token}"},
)
assert resp.status_code == 200
data = resp.json()
assert data["model"] == "openai/gpt-5-image-mini"
assert len(data["images"]) == 1
async def test_generate_image_chat_with_image_config(client):
token = await _user_token(client)
mock = AsyncMock(return_value=FAKE_IMAGE_CHAT_FLUX)
with patch("backend.app.routers.generate.openrouter.generate_image_chat", mock):
await client.post(
"/generate/image",
json={
"model": "black-forest-labs/flux.2-klein-4b",
"prompt": "A landscape",
"aspect_ratio": "16:9",
"image_size": "2K",
},
headers={"Authorization": f"Bearer {token}"},
)
call_kwargs = mock.call_args.kwargs
assert call_kwargs["image_config"]["aspect_ratio"] == "16:9"
assert call_kwargs["image_config"]["image_size"] == "2K"
assert call_kwargs["modalities"] == ["image"]
async def test_generate_image_chat_unauthenticated(client):
resp = await client.post("/generate/image", json={"model": "flux.2-klein-4b", "prompt": "Hi"})
assert resp.status_code == 401
async def test_generate_image_chat_upstream_error(client):
token = await _user_token(client)
with patch("backend.app.routers.generate.openrouter.generate_image_chat", new_callable=AsyncMock, side_effect=Exception("timeout")):
resp = await client.post(
"/generate/image",
json={"model": "black-forest-labs/flux.2-klein-4b", "prompt": "Hi"},
headers={"Authorization": f"Bearer {token}"},
)
assert resp.status_code == 502
# ---------------------------------------------------------------------------
# POST /generate/video
# ---------------------------------------------------------------------------
+2
View File
@@ -158,6 +158,8 @@ def generate_image():
"prompt": request.form.get("prompt", "").strip(),
"n": int(request.form.get("n", 1)),
"size": request.form.get("size", "1024x1024"),
"aspect_ratio": request.form.get("aspect_ratio", "").strip() or None,
"image_size": request.form.get("image_size", "").strip() or None,
})
if resp.status_code == 200:
result = resp.json()
@@ -21,6 +21,27 @@
<option value="512x512" {% if request.form.get('size')=='512x512' %}selected{% endif %}>512×512</option>
</select>
<label for="aspect_ratio">Aspect ratio</label>
<select id="aspect_ratio" name="aspect_ratio">
<option value="">Auto (default)</option>
<option value="1:1" {% if request.form.get('aspect_ratio')=='1:1' %}selected{% endif %}>1:1 (square)</option>
<option value="16:9" {% if request.form.get('aspect_ratio')=='16:9' %}selected{% endif %}>16:9 (landscape)</option>
<option value="9:16" {% if request.form.get('aspect_ratio')=='9:16' %}selected{% endif %}>9:16 (portrait)</option>
<option value="4:3" {% if request.form.get('aspect_ratio')=='4:3' %}selected{% endif %}>4:3</option>
<option value="3:4" {% if request.form.get('aspect_ratio')=='3:4' %}selected{% endif %}>3:4</option>
<option value="3:2" {% if request.form.get('aspect_ratio')=='3:2' %}selected{% endif %}>3:2</option>
<option value="2:3" {% if request.form.get('aspect_ratio')=='2:3' %}selected{% endif %}>2:3</option>
</select>
<label for="image_size">Resolution</label>
<select id="image_size" name="image_size">
<option value="">Auto (default)</option>
<option value="0.5K" {% if request.form.get('image_size')=='0.5K' %}selected{% endif %}>0.5K (low)</option>
<option value="1K" {% if request.form.get('image_size')=='1K' %}selected{% endif %}>1K (standard)</option>
<option value="2K" {% if request.form.get('image_size')=='2K' %}selected{% endif %}>2K (high)</option>
<option value="4K" {% if request.form.get('image_size')=='4K' %}selected{% endif %}>4K (ultra)</option>
</select>
<label for="n">Number of images</label>
<select id="n" name="n">
<option value="1" {% if request.form.get('n','1')=='1' %}selected{% endif %}>1</option>