fix(llm): log and handle non-STOP finish_reason from Gemini
When Gemini stops generation early (e.g. due to safety filters or thinking-model quirks), finish_reason != STOP but no exception is raised, causing the caller to receive truncated JSON and a confusing 502 "invalid JSON" error. Now: - finish_reason is extracted from candidates[0] and stored in ai_call_logs - any non-STOP finish_reason raises HTTP 502 with a clear message - Alembic migration adds the finish_reason column to ai_call_logs Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
18683925a1
commit
092fd87606
3 changed files with 43 additions and 1 deletions
|
|
@ -0,0 +1,29 @@
|
||||||
|
"""add_finish_reason_to_ai_call_logs
|
||||||
|
|
||||||
|
Revision ID: b2c3d4e5f6a1
|
||||||
|
Revises: a1b2c3d4e5f6
|
||||||
|
Create Date: 2026-03-01 00:00:00.000000
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
revision: str = "b2c3d4e5f6a1"
|
||||||
|
down_revision: Union[str, None] = "a1b2c3d4e5f6"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.add_column(
|
||||||
|
"ai_call_logs",
|
||||||
|
sa.Column("finish_reason", sa.Text(), nullable=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_column("ai_call_logs", "finish_reason")
|
||||||
|
|
@ -47,11 +47,22 @@ def call_gemini(
|
||||||
user_input = str(contents)
|
user_input = str(contents)
|
||||||
|
|
||||||
start = time.monotonic()
|
start = time.monotonic()
|
||||||
success, error_detail, response = True, None, None
|
success, error_detail, response, finish_reason = True, None, None, None
|
||||||
try:
|
try:
|
||||||
response = client.models.generate_content(
|
response = client.models.generate_content(
|
||||||
model=model, contents=contents, config=config
|
model=model, contents=contents, config=config
|
||||||
)
|
)
|
||||||
|
with suppress(Exception):
|
||||||
|
finish_reason = response.candidates[0].finish_reason.name
|
||||||
|
if finish_reason and finish_reason != "STOP":
|
||||||
|
success = False
|
||||||
|
error_detail = f"finish_reason: {finish_reason}"
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=502,
|
||||||
|
detail=f"Gemini stopped early (finish_reason={finish_reason})",
|
||||||
|
)
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
success = False
|
success = False
|
||||||
error_detail = str(exc)
|
error_detail = str(exc)
|
||||||
|
|
@ -81,6 +92,7 @@ def call_gemini(
|
||||||
else None
|
else None
|
||||||
),
|
),
|
||||||
duration_ms=duration_ms,
|
duration_ms=duration_ms,
|
||||||
|
finish_reason=finish_reason,
|
||||||
success=success,
|
success=success,
|
||||||
error_detail=error_detail,
|
error_detail=error_detail,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -23,5 +23,6 @@ class AICallLog(SQLModel, table=True):
|
||||||
completion_tokens: int | None = Field(default=None)
|
completion_tokens: int | None = Field(default=None)
|
||||||
total_tokens: int | None = Field(default=None)
|
total_tokens: int | None = Field(default=None)
|
||||||
duration_ms: int | None = Field(default=None)
|
duration_ms: int | None = Field(default=None)
|
||||||
|
finish_reason: str | None = Field(default=None)
|
||||||
success: bool = Field(default=True, index=True)
|
success: bool = Field(default=True, index=True)
|
||||||
error_detail: str | None = Field(default=None)
|
error_detail: str | None = Field(default=None)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue