From 092fd87606513a377803800499554fc0ccd98315 Mon Sep 17 00:00:00 2001 From: Piotr Oleszczyk Date: Sun, 1 Mar 2026 20:08:22 +0100 Subject: [PATCH] fix(llm): log and handle non-STOP finish_reason from Gemini When Gemini stops generation early (e.g. due to safety filters or thinking-model quirks), finish_reason != STOP but no exception is raised, causing the caller to receive truncated JSON and a confusing 502 "invalid JSON" error. Now: - finish_reason is extracted from candidates[0] and stored in ai_call_logs - any non-STOP finish_reason raises HTTP 502 with a clear message - Alembic migration adds the finish_reason column to ai_call_logs Co-Authored-By: Claude Sonnet 4.6 --- ...5f6a1_add_finish_reason_to_ai_call_logs.py | 29 +++++++++++++++++++ backend/innercontext/llm.py | 14 ++++++++- backend/innercontext/models/ai_log.py | 1 + 3 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 backend/alembic/versions/b2c3d4e5f6a1_add_finish_reason_to_ai_call_logs.py diff --git a/backend/alembic/versions/b2c3d4e5f6a1_add_finish_reason_to_ai_call_logs.py b/backend/alembic/versions/b2c3d4e5f6a1_add_finish_reason_to_ai_call_logs.py new file mode 100644 index 0000000..d6c0de6 --- /dev/null +++ b/backend/alembic/versions/b2c3d4e5f6a1_add_finish_reason_to_ai_call_logs.py @@ -0,0 +1,29 @@ +"""add_finish_reason_to_ai_call_logs + +Revision ID: b2c3d4e5f6a1 +Revises: a1b2c3d4e5f6 +Create Date: 2026-03-01 00:00:00.000000 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +revision: str = "b2c3d4e5f6a1" +down_revision: Union[str, None] = "a1b2c3d4e5f6" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "ai_call_logs", + sa.Column("finish_reason", sa.Text(), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("ai_call_logs", "finish_reason") diff --git a/backend/innercontext/llm.py b/backend/innercontext/llm.py index 90fe12d..5bc3b9d 100644 --- a/backend/innercontext/llm.py +++ b/backend/innercontext/llm.py @@ -47,11 +47,22 @@ def call_gemini( user_input = str(contents) start = time.monotonic() - success, error_detail, response = True, None, None + success, error_detail, response, finish_reason = True, None, None, None try: response = client.models.generate_content( model=model, contents=contents, config=config ) + with suppress(Exception): + finish_reason = response.candidates[0].finish_reason.name + if finish_reason and finish_reason != "STOP": + success = False + error_detail = f"finish_reason: {finish_reason}" + raise HTTPException( + status_code=502, + detail=f"Gemini stopped early (finish_reason={finish_reason})", + ) + except HTTPException: + raise except Exception as exc: success = False error_detail = str(exc) @@ -81,6 +92,7 @@ def call_gemini( else None ), duration_ms=duration_ms, + finish_reason=finish_reason, success=success, error_detail=error_detail, ) diff --git a/backend/innercontext/models/ai_log.py b/backend/innercontext/models/ai_log.py index 769f71d..44bb749 100644 --- a/backend/innercontext/models/ai_log.py +++ b/backend/innercontext/models/ai_log.py @@ -23,5 +23,6 @@ class AICallLog(SQLModel, table=True): completion_tokens: int | None = Field(default=None) total_tokens: int | None = Field(default=None) duration_ms: int | None = Field(default=None) + finish_reason: str | None = Field(default=None) success: bool = Field(default=True, index=True) error_detail: str | None = Field(default=None)