innercontext/backend/innercontext/llm.py
Piotr Oleszczyk 092fd87606 fix(llm): log and handle non-STOP finish_reason from Gemini
When Gemini stops generation early (e.g. due to safety filters or
thinking-model quirks), finish_reason != STOP but no exception is raised,
causing the caller to receive truncated JSON and a confusing 502 "invalid
JSON" error. Now:
- finish_reason is extracted from candidates[0] and stored in ai_call_logs
- any non-STOP finish_reason raises HTTP 502 with a clear message
- Alembic migration adds the finish_reason column to ai_call_logs

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-01 20:08:22 +01:00

102 lines
3.4 KiB
Python

"""Shared helpers for Gemini API access."""
import os
import time
from contextlib import suppress
from fastapi import HTTPException
from google import genai
from google.genai import types as genai_types
_DEFAULT_MODEL = "gemini-flash-latest"
def get_gemini_client() -> tuple[genai.Client, str]:
"""Return an authenticated Gemini client and the configured model name.
Raises HTTP 503 if GEMINI_API_KEY is not set.
"""
api_key = os.environ.get("GEMINI_API_KEY")
if not api_key:
raise HTTPException(status_code=503, detail="GEMINI_API_KEY not configured")
model = os.environ.get("GEMINI_MODEL", _DEFAULT_MODEL)
return genai.Client(api_key=api_key), model
def call_gemini(
*,
endpoint: str,
contents,
config: genai_types.GenerateContentConfig,
user_input: str | None = None,
):
"""Call Gemini, log full request + response to DB, return response unchanged."""
from sqlmodel import Session
from db import engine # deferred to avoid circular import at module load
from innercontext.models.ai_log import AICallLog
client, model = get_gemini_client()
sys_prompt = None
if config.system_instruction:
raw = config.system_instruction
sys_prompt = raw if isinstance(raw, str) else str(raw)
if user_input is None:
with suppress(Exception):
user_input = str(contents)
start = time.monotonic()
success, error_detail, response, finish_reason = True, None, None, None
try:
response = client.models.generate_content(
model=model, contents=contents, config=config
)
with suppress(Exception):
finish_reason = response.candidates[0].finish_reason.name
if finish_reason and finish_reason != "STOP":
success = False
error_detail = f"finish_reason: {finish_reason}"
raise HTTPException(
status_code=502,
detail=f"Gemini stopped early (finish_reason={finish_reason})",
)
except HTTPException:
raise
except Exception as exc:
success = False
error_detail = str(exc)
raise HTTPException(status_code=502, detail=f"Gemini API error: {exc}") from exc
finally:
duration_ms = int((time.monotonic() - start) * 1000)
with suppress(Exception):
log = AICallLog(
endpoint=endpoint,
model=model,
system_prompt=sys_prompt,
user_input=user_input,
response_text=response.text if response else None,
prompt_tokens=(
response.usage_metadata.prompt_token_count
if response and response.usage_metadata
else None
),
completion_tokens=(
response.usage_metadata.candidates_token_count
if response and response.usage_metadata
else None
),
total_tokens=(
response.usage_metadata.total_token_count
if response and response.usage_metadata
else None
),
duration_ms=duration_ms,
finish_reason=finish_reason,
success=success,
error_detail=error_detail,
)
with Session(engine) as s:
s.add(log)
s.commit()
return response