feat(api): add Phase 3 observability - expose validation warnings and metadata to frontend

Backend changes: - Create ResponseMetadata and TokenMetrics models for API responses - Modify call_gemini() and call_gemini_with_function_tools() to return (response, log_id) tuple - Add _build_response_metadata() helper to extract metadata from AICallLog - Update routines API (/suggest, /suggest-batch) to populate validation_warnings, auto_fixes_applied, and metadata - Update products API (/suggest) to populate observability fields - Update skincare API to handle new return signature Frontend changes: - Add TypeScript types: TokenMetrics, ResponseMetadata - Update RoutineSuggestion, BatchSuggestion, ShoppingSuggestionResponse with observability fields Next: Create UI components to display warnings, reasoning chains, and token metrics
2026-03-06 15:50:28 +01:00 · 2026-03-06 15:50:28 +01:00 · 3c3248c2ea
commit 3c3248c2ea
parent 3bf19d8acb
6 changed files with 213 additions and 40 deletions
--- a/backend/innercontext/api/products.py
+++ b/backend/innercontext/api/products.py
@ -1,7 +1,7 @@
 import json
 import logging
 from datetime import date
-from typing import Literal, Optional
+from typing import Any, Literal, Optional
 from uuid import UUID, uuid4
 from fastapi import APIRouter, Depends, HTTPException, Query
@ -42,6 +42,8 @@ from innercontext.models import (
    SkinConcern,
    SkinConditionSnapshot,
 )
 from innercontext.models.ai_log import AICallLog
 from innercontext.models.api_metadata import ResponseMetadata, TokenMetrics
 from innercontext.models.enums import (
    AbsorptionSpeed,
    DayTime,
@ -63,6 +65,37 @@ logger = logging.getLogger(__name__)
 router = APIRouter()
 def _build_response_metadata(session: Session, log_id: Any) -> ResponseMetadata | None:
    """Build ResponseMetadata from AICallLog for Phase 3 observability."""
    if not log_id:
        return None
    log = session.get(AICallLog, log_id)
    if not log:
        return None
    token_metrics = None
    if (
        log.prompt_tokens is not None
        and log.completion_tokens is not None
        and log.total_tokens is not None
    ):
        token_metrics = TokenMetrics(
            prompt_tokens=log.prompt_tokens,
            completion_tokens=log.completion_tokens,
            thoughts_tokens=log.thoughts_tokens,
            total_tokens=log.total_tokens,
        )
    return ResponseMetadata(
        model_used=log.model,
        duration_ms=log.duration_ms or 0,
        reasoning_chain=log.reasoning_chain,
        token_metrics=token_metrics,
    )
 PricingSource = Literal["category", "fallback", "insufficient_data"]
 PricingOutput = tuple[PriceTier | None, float | None, PricingSource | None]
 PricingOutputs = dict[UUID, PricingOutput]
@ -234,6 +267,10 @@ class ProductSuggestion(PydanticBase):
 class ShoppingSuggestionResponse(PydanticBase):
    suggestions: list[ProductSuggestion]
    reasoning: str
    # Phase 3: Observability fields
    validation_warnings: list[str] | None = None
    auto_fixes_applied: list[str] | None = None
    metadata: "ResponseMetadata | None" = None
 class _ProductSuggestionOut(PydanticBase):
@ -609,7 +646,7 @@ def parse_product_text(data: ProductParseRequest) -> ProductParseResponse:
    # Phase 1: Sanitize input text
    sanitized_text = sanitize_user_input(data.text, max_length=10000)
-    response = call_gemini(
+    response, log_id = call_gemini(
        endpoint="products/parse-text",
        contents=f"Extract product data from this text:\n\n{sanitized_text}",
        config=get_extraction_config(
@ -997,7 +1034,7 @@ def suggest_shopping(session: Session = Depends(get_session)):
    }
    try:
-        response = call_gemini_with_function_tools(
+        response, log_id = call_gemini_with_function_tools(
            endpoint="products/suggest",
            contents=prompt,
            config=config,
@ -1020,7 +1057,7 @@ def suggest_shopping(session: Session = Depends(get_session)):
            "- Zasugeruj tylko najbardziej bezpieczne i realistyczne typy produktow do uzupelnienia brakow,"
            " unikaj agresywnych aktywnych przy niepelnych danych.\n"
        )
-        response = call_gemini(
+        response, log_id = call_gemini(
            endpoint="products/suggest",
            contents=conservative_prompt,
            config=get_creative_config(
@ -1044,12 +1081,6 @@ def suggest_shopping(session: Session = Depends(get_session)):
    except json.JSONDecodeError as e:
        raise HTTPException(status_code=502, detail=f"LLM returned invalid JSON: {e}")
    shopping_response = ShoppingSuggestionResponse(
        suggestions=[ProductSuggestion(**s) for s in parsed.get("suggestions", [])],
        reasoning=parsed.get("reasoning", ""),
    )
    # Phase 1: Validate the shopping suggestions
    # Get products with inventory (those user already owns)
    products_with_inventory = session.exec(
        select(Product).join(ProductInventory).distinct()
@ -1061,7 +1092,15 @@ def suggest_shopping(session: Session = Depends(get_session)):
        valid_targets=set(SkinConcern),
    )
    # Phase 1: Validate the shopping suggestions
    validator = ShoppingValidator()
    # Build initial shopping response without metadata
    shopping_response = ShoppingSuggestionResponse(
        suggestions=[ProductSuggestion(**s) for s in parsed.get("suggestions", [])],
        reasoning=parsed.get("reasoning", ""),
    )
    validation_result = validator.validate(shopping_response, shopping_context)
    if not validation_result.is_valid:
@ -1073,7 +1112,14 @@ def suggest_shopping(session: Session = Depends(get_session)):
            detail=f"Generated shopping suggestions failed validation: {'; '.join(validation_result.errors)}",
        )
    # Phase 3: Add warnings, auto-fixes, and metadata to response
    if validation_result.warnings:
        logger.warning(f"Shopping suggestion warnings: {validation_result.warnings}")
        shopping_response.validation_warnings = validation_result.warnings
    if validation_result.auto_fixes:
        shopping_response.auto_fixes_applied = validation_result.auto_fixes
    shopping_response.metadata = _build_response_metadata(session, log_id)
    return shopping_response
--- a/backend/innercontext/api/routines.py
+++ b/backend/innercontext/api/routines.py
@ -2,7 +2,7 @@ import json
 import logging
 import math
 from datetime import date, timedelta
-from typing import Optional
+from typing import Any, Optional
 from uuid import UUID, uuid4
 from fastapi import APIRouter, Depends, HTTPException
@ -40,6 +40,8 @@ from innercontext.models import (
    RoutineStep,
    SkinConditionSnapshot,
 )
 from innercontext.models.ai_log import AICallLog
 from innercontext.models.api_metadata import ResponseMetadata, TokenMetrics
 from innercontext.models.enums import GroomingAction, PartOfDay
 from innercontext.validators import BatchValidator, RoutineSuggestionValidator
 from innercontext.validators.batch_validator import BatchValidationContext
@ -47,6 +49,37 @@ from innercontext.validators.routine_validator import RoutineValidationContext
 logger = logging.getLogger(__name__)
 def _build_response_metadata(session: Session, log_id: Any) -> ResponseMetadata | None:
    """Build ResponseMetadata from AICallLog for Phase 3 observability."""
    if not log_id:
        return None
    log = session.get(AICallLog, log_id)
    if not log:
        return None
    token_metrics = None
    if (
        log.prompt_tokens is not None
        and log.completion_tokens is not None
        and log.total_tokens is not None
    ):
        token_metrics = TokenMetrics(
            prompt_tokens=log.prompt_tokens,
            completion_tokens=log.completion_tokens,
            thoughts_tokens=log.thoughts_tokens,
            total_tokens=log.total_tokens,
        )
    return ResponseMetadata(
        model_used=log.model,
        duration_ms=log.duration_ms or 0,
        reasoning_chain=log.reasoning_chain,
        token_metrics=token_metrics,
    )
 router = APIRouter()
@ -124,6 +157,10 @@ class RoutineSuggestion(SQLModel):
    steps: list[SuggestedStep]
    reasoning: str
    summary: Optional[RoutineSuggestionSummary] = None
    # Phase 3: Observability fields
    validation_warnings: Optional[list[str]] = None
    auto_fixes_applied: Optional[list[str]] = None
    metadata: Optional[ResponseMetadata] = None
 class SuggestBatchRequest(SQLModel):
@ -144,6 +181,10 @@ class DayPlan(SQLModel):
 class BatchSuggestion(SQLModel):
    days: list[DayPlan]
    overall_reasoning: str
    # Phase 3: Observability fields
    validation_warnings: Optional[list[str]] = None
    auto_fixes_applied: Optional[list[str]] = None
    metadata: Optional[ResponseMetadata] = None
 # ---------------------------------------------------------------------------
@ -674,7 +715,7 @@ def suggest_routine(
    }
    try:
-        response = call_gemini_with_function_tools(
+        response, log_id = call_gemini_with_function_tools(
            endpoint="routines/suggest",
            contents=prompt,
            config=config,
@ -698,7 +739,7 @@ def suggest_routine(
            " preferujac lagodne produkty wspierajace bariere i fotoprotekcje.\n"
            "- Gdy masz watpliwosci, pomijaj ryzykowne aktywne kroki.\n"
        )
-        response = call_gemini(
+        response, log_id = call_gemini(
            endpoint="routines/suggest",
            contents=conservative_prompt,
            config=get_creative_config(
@ -760,13 +801,6 @@ def suggest_routine(
        confidence=confidence,
    )
    # Phase 1: Validate the response
    suggestion = RoutineSuggestion(
        steps=steps,
        reasoning=parsed.get("reasoning", ""),
        summary=summary,
    )
    # Get skin snapshot for barrier state
    stmt = select(SkinConditionSnapshot).order_by(
        col(SkinConditionSnapshot.snapshot_date).desc()
@ -790,8 +824,16 @@ def suggest_routine(
        just_shaved=False,  # Could be enhanced with grooming context
    )
-    # Validate
+    # Phase 1: Validate the response
    validator = RoutineSuggestionValidator()
    # Build initial suggestion without metadata
    suggestion = RoutineSuggestion(
        steps=steps,
        reasoning=parsed.get("reasoning", ""),
        summary=summary,
    )
    validation_result = validator.validate(suggestion, validation_context)
    if not validation_result.is_valid:
@ -805,10 +847,15 @@ def suggest_routine(
            detail=f"Generated routine failed safety validation: {'; '.join(validation_result.errors)}",
        )
-    # Add warnings to response if any
+    # Phase 3: Add warnings, auto-fixes, and metadata to response
    if validation_result.warnings:
        logger.warning(f"Routine suggestion warnings: {validation_result.warnings}")
-        # Note: We'll add warnings field to RoutineSuggestion model in a moment
+        suggestion.validation_warnings = validation_result.warnings
    if validation_result.auto_fixes:
        suggestion.auto_fixes_applied = validation_result.auto_fixes
    suggestion.metadata = _build_response_metadata(session, log_id)
    return suggestion
@ -878,7 +925,7 @@ def suggest_batch(
        "\nZwróć JSON zgodny ze schematem."
    )
-    response = call_gemini(
+    response, log_id = call_gemini(
        endpoint="routines/suggest-batch",
        contents=prompt,
        config=get_creative_config(
@ -936,11 +983,6 @@ def suggest_batch(
            )
        )
    batch_suggestion = BatchSuggestion(
        days=days, overall_reasoning=parsed.get("overall_reasoning", "")
    )
    # Phase 1: Validate the batch response
    # Get skin snapshot for barrier state
    stmt = select(SkinConditionSnapshot).order_by(
        col(SkinConditionSnapshot.snapshot_date).desc()
@ -964,8 +1006,14 @@ def suggest_batch(
        last_used_dates=last_used_dates_by_uuid,
    )
-    # Validate
+    # Phase 1: Validate the batch response
    batch_validator = BatchValidator()
    # Build initial batch suggestion without metadata
    batch_suggestion = BatchSuggestion(
        days=days, overall_reasoning=parsed.get("overall_reasoning", "")
    )
    validation_result = batch_validator.validate(batch_suggestion, batch_context)
    if not validation_result.is_valid:
@ -977,9 +1025,15 @@ def suggest_batch(
            detail=f"Generated batch plan failed safety validation: {'; '.join(validation_result.errors)}",
        )
-    # Log warnings if any
+    # Phase 3: Add warnings, auto-fixes, and metadata to response
    if validation_result.warnings:
        logger.warning(f"Batch routine warnings: {validation_result.warnings}")
        batch_suggestion.validation_warnings = validation_result.warnings
    if validation_result.auto_fixes:
        batch_suggestion.auto_fixes_applied = validation_result.auto_fixes
    batch_suggestion.metadata = _build_response_metadata(session, log_id)
    return batch_suggestion
--- a/backend/innercontext/api/skincare.py
+++ b/backend/innercontext/api/skincare.py
@ -179,7 +179,7 @@ async def analyze_skin_photos(
    )
    image_summary = f"{len(photos)} image(s): {', '.join((p.content_type or 'unknown') for p in photos)}"
-    response = call_gemini(
+    response, log_id = call_gemini(
        endpoint="skincare/analyze-photos",
        contents=parts,
        config=get_extraction_config(
--- a/backend/innercontext/llm.py
+++ b/backend/innercontext/llm.py
@ -109,8 +109,12 @@ def call_gemini(
    config: genai_types.GenerateContentConfig,
    user_input: str | None = None,
    tool_trace: dict[str, Any] | None = None,
-):
+) -> tuple[Any, Any]:
-    """Call Gemini, log full request + response to DB, return response unchanged."""
+    """Call Gemini, log full request + response to DB.
    Returns:
        Tuple of (response, log_id) where log_id is the AICallLog.id (UUID) or None if logging failed.
    """
    from sqlmodel import Session
    from db import engine  # deferred to avoid circular import at module load
@ -127,7 +131,13 @@ def call_gemini(
            user_input = str(contents)
    start = time.monotonic()
-    success, error_detail, response, finish_reason = True, None, None, None
+    success, error_detail, response, finish_reason, log_id = (
        True,
        None,
        None,
        None,
        None,
    )
    try:
        response = client.models.generate_content(
            model=model, contents=contents, config=config
@ -199,7 +209,9 @@ def call_gemini(
            with Session(engine) as s:
                s.add(log)
                s.commit()
-    return response
+                s.refresh(log)
                log_id = log.id
    return response, log_id
 def call_gemini_with_function_tools(
@ -210,17 +222,22 @@ def call_gemini_with_function_tools(
    function_handlers: dict[str, Callable[[dict[str, Any]], dict[str, Any]]],
    user_input: str | None = None,
    max_tool_roundtrips: int = 2,
-):
+) -> tuple[Any, Any]:
-    """Call Gemini with function-calling loop until final response text is produced."""
+    """Call Gemini with function-calling loop until final response text is produced.
    Returns:
        Tuple of (response, log_id) where log_id is the AICallLog.id (UUID) of the final call.
    """
    if max_tool_roundtrips < 0:
        raise ValueError("max_tool_roundtrips must be >= 0")
    history = list(contents) if isinstance(contents, list) else [contents]
    rounds = 0
    trace_events: list[dict[str, Any]] = []
    log_id = None
    while True:
-        response = call_gemini(
+        response, log_id = call_gemini(
            endpoint=endpoint,
            contents=history,
            config=config,
@ -233,7 +250,7 @@ def call_gemini_with_function_tools(
        )
        function_calls = list(getattr(response, "function_calls", None) or [])
        if not function_calls:
-            return response
+            return response, log_id
        if rounds >= max_tool_roundtrips:
            raise HTTPException(
--- a/backend/innercontext/models/api_metadata.py
+++ b/backend/innercontext/models/api_metadata.py
@ -0,0 +1,29 @@
 """Models for API response metadata (Phase 3: UI/UX Observability)."""
 from pydantic import BaseModel
 class TokenMetrics(BaseModel):
    """Token usage metrics from LLM call."""
    prompt_tokens: int
    completion_tokens: int
    thoughts_tokens: int | None = None
    total_tokens: int
 class ResponseMetadata(BaseModel):
    """Metadata about the LLM response for observability."""
    model_used: str
    duration_ms: int
    reasoning_chain: str | None = None
    token_metrics: TokenMetrics | None = None
 class EnrichedResponse(BaseModel):
    """Base class for API responses with validation and metadata."""
    validation_warnings: list[str] | None = None
    auto_fixes_applied: list[str] | None = None
    metadata: ResponseMetadata | None = None
--- a/frontend/src/lib/types.ts
+++ b/frontend/src/lib/types.ts
@ -241,10 +241,29 @@ export interface RoutineSuggestionSummary {
  confidence: number;
 }
 // Phase 3: Observability metadata types
 export interface TokenMetrics {
  prompt_tokens: number;
  completion_tokens: number;
  thoughts_tokens?: number;
  total_tokens: number;
 }
 export interface ResponseMetadata {
  model_used: string;
  duration_ms: number;
  reasoning_chain?: string;
  token_metrics?: TokenMetrics;
 }
 export interface RoutineSuggestion {
  steps: SuggestedStep[];
  reasoning: string;
  summary?: RoutineSuggestionSummary;
  // Phase 3: Observability fields
  validation_warnings?: string[];
  auto_fixes_applied?: string[];
  metadata?: ResponseMetadata;
 }
 export interface DayPlan {
@ -257,6 +276,10 @@ export interface DayPlan {
 export interface BatchSuggestion {
  days: DayPlan[];
  overall_reasoning: string;
  // Phase 3: Observability fields
  validation_warnings?: string[];
  auto_fixes_applied?: string[];
  metadata?: ResponseMetadata;
 }
 // ─── Shopping suggestion types ───────────────────────────────────────────────
@ -274,6 +297,10 @@ export interface ProductSuggestion {
 export interface ShoppingSuggestionResponse {
  suggestions: ProductSuggestion[];
  reasoning: string;
  // Phase 3: Observability fields
  validation_warnings?: string[];
  auto_fixes_applied?: string[];
  metadata?: ResponseMetadata;
 }
 // ─── Health types ────────────────────────────────────────────────────────────