fix(llm): log and handle non-STOP finish_reason from Gemini

When Gemini stops generation early (e.g. due to safety filters or thinking-model quirks), finish_reason != STOP but no exception is raised, causing the caller to receive truncated JSON and a confusing 502 "invalid JSON" error. Now: - finish_reason is extracted from candidates[0] and stored in ai_call_logs - any non-STOP finish_reason raises HTTP 502 with a clear message - Alembic migration adds the finish_reason column to ai_call_logs Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-01 20:08:22 +01:00 · 2026-03-01 20:08:22 +01:00 · 092fd87606
commit 092fd87606
parent 18683925a1
3 changed files with 43 additions and 1 deletions
--- a/backend/alembic/versions/b2c3d4e5f6a1_add_finish_reason_to_ai_call_logs.py
+++ b/backend/alembic/versions/b2c3d4e5f6a1_add_finish_reason_to_ai_call_logs.py
@ -0,0 +1,29 @@
 """add_finish_reason_to_ai_call_logs
 Revision ID: b2c3d4e5f6a1
 Revises: a1b2c3d4e5f6
 Create Date: 2026-03-01 00:00:00.000000
 """
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 revision: str = "b2c3d4e5f6a1"
 down_revision: Union[str, None] = "a1b2c3d4e5f6"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    op.add_column(
        "ai_call_logs",
        sa.Column("finish_reason", sa.Text(), nullable=True),
    )
 def downgrade() -> None:
    op.drop_column("ai_call_logs", "finish_reason")
--- a/backend/innercontext/llm.py
+++ b/backend/innercontext/llm.py
@ -47,11 +47,22 @@ def call_gemini(
            user_input = str(contents)
    start = time.monotonic()
-    success, error_detail, response = True, None, None
+    success, error_detail, response, finish_reason = True, None, None, None
    try:
        response = client.models.generate_content(
            model=model, contents=contents, config=config
        )
        with suppress(Exception):
            finish_reason = response.candidates[0].finish_reason.name
        if finish_reason and finish_reason != "STOP":
            success = False
            error_detail = f"finish_reason: {finish_reason}"
            raise HTTPException(
                status_code=502,
                detail=f"Gemini stopped early (finish_reason={finish_reason})",
            )
    except HTTPException:
        raise
    except Exception as exc:
        success = False
        error_detail = str(exc)
@ -81,6 +92,7 @@ def call_gemini(
                    else None
                ),
                duration_ms=duration_ms,
                finish_reason=finish_reason,
                success=success,
                error_detail=error_detail,
            )
--- a/backend/innercontext/models/ai_log.py
+++ b/backend/innercontext/models/ai_log.py
@ -23,5 +23,6 @@ class AICallLog(SQLModel, table=True):
    completion_tokens: int | None = Field(default=None)
    total_tokens: int | None = Field(default=None)
    duration_ms: int | None = Field(default=None)
    finish_reason: str | None = Field(default=None)
    success: bool = Field(default=True, index=True)
    error_detail: str | None = Field(default=None)