innercontext/backend/innercontext/llm.py

"""Shared helpers for Gemini API access."""

import os
import time
from contextlib import suppress

from fastapi import HTTPException
from google import genai
from google.genai import types as genai_types

_DEFAULT_MODEL = "gemini-flash-latest"


def get_gemini_client() -> tuple[genai.Client, str]:
    """Return an authenticated Gemini client and the configured model name.

    Raises HTTP 503 if GEMINI_API_KEY is not set.
    """
    api_key = os.environ.get("GEMINI_API_KEY")
    if not api_key:
        raise HTTPException(status_code=503, detail="GEMINI_API_KEY not configured")
    model = os.environ.get("GEMINI_MODEL", _DEFAULT_MODEL)
    return genai.Client(api_key=api_key), model


def call_gemini(
    *,
    endpoint: str,
    contents,
    config: genai_types.GenerateContentConfig,
    user_input: str | None = None,
):
    """Call Gemini, log full request + response to DB, return response unchanged."""
    from sqlmodel import Session

    from db import engine  # deferred to avoid circular import at module load
    from innercontext.models.ai_log import AICallLog

    client, model = get_gemini_client()

    sys_prompt = None
    if config.system_instruction:
        raw = config.system_instruction
        sys_prompt = raw if isinstance(raw, str) else str(raw)
    if user_input is None:
        with suppress(Exception):
            user_input = str(contents)

    start = time.monotonic()
    success, error_detail, response, finish_reason = True, None, None, None
    try:
        response = client.models.generate_content(
            model=model, contents=contents, config=config
        )
        with suppress(Exception):
            finish_reason = response.candidates[0].finish_reason.name
        if finish_reason and finish_reason != "STOP":
            success = False
            error_detail = f"finish_reason: {finish_reason}"
            raise HTTPException(
                status_code=502,
                detail=f"Gemini stopped early (finish_reason={finish_reason})",
            )
    except HTTPException:
        raise
    except Exception as exc:
        success = False
        error_detail = str(exc)
        raise HTTPException(status_code=502, detail=f"Gemini API error: {exc}") from exc
    finally:
        duration_ms = int((time.monotonic() - start) * 1000)
        with suppress(Exception):
            log = AICallLog(
                endpoint=endpoint,
                model=model,
                system_prompt=sys_prompt,
                user_input=user_input,
                response_text=response.text if response else None,
                prompt_tokens=(
                    response.usage_metadata.prompt_token_count
                    if response and response.usage_metadata
                    else None
                ),
                completion_tokens=(
                    response.usage_metadata.candidates_token_count
                    if response and response.usage_metadata
                    else None
                ),
                total_tokens=(
                    response.usage_metadata.total_token_count
                    if response and response.usage_metadata
                    else None
                ),
                duration_ms=duration_ms,
                finish_reason=finish_reason,
                success=success,
                error_detail=error_detail,
            )
            with Session(engine) as s:
                s.add(log)
                s.commit()
    return response