From 3c3248c2eac9038775c1e54224a5599742d86af8 Mon Sep 17 00:00:00 2001 From: Piotr Oleszczyk Date: Fri, 6 Mar 2026 15:50:28 +0100 Subject: [PATCH] feat(api): add Phase 3 observability - expose validation warnings and metadata to frontend Backend changes: - Create ResponseMetadata and TokenMetrics models for API responses - Modify call_gemini() and call_gemini_with_function_tools() to return (response, log_id) tuple - Add _build_response_metadata() helper to extract metadata from AICallLog - Update routines API (/suggest, /suggest-batch) to populate validation_warnings, auto_fixes_applied, and metadata - Update products API (/suggest) to populate observability fields - Update skincare API to handle new return signature Frontend changes: - Add TypeScript types: TokenMetrics, ResponseMetadata - Update RoutineSuggestion, BatchSuggestion, ShoppingSuggestionResponse with observability fields Next: Create UI components to display warnings, reasoning chains, and token metrics --- backend/innercontext/api/products.py | 66 +++++++++++--- backend/innercontext/api/routines.py | 96 ++++++++++++++++----- backend/innercontext/api/skincare.py | 2 +- backend/innercontext/llm.py | 33 +++++-- backend/innercontext/models/api_metadata.py | 29 +++++++ frontend/src/lib/types.ts | 27 ++++++ 6 files changed, 213 insertions(+), 40 deletions(-) create mode 100644 backend/innercontext/models/api_metadata.py diff --git a/backend/innercontext/api/products.py b/backend/innercontext/api/products.py index d6bc82e..eb4840b 100644 --- a/backend/innercontext/api/products.py +++ b/backend/innercontext/api/products.py @@ -1,7 +1,7 @@ import json import logging from datetime import date -from typing import Literal, Optional +from typing import Any, Literal, Optional from uuid import UUID, uuid4 from fastapi import APIRouter, Depends, HTTPException, Query @@ -42,6 +42,8 @@ from innercontext.models import ( SkinConcern, SkinConditionSnapshot, ) +from innercontext.models.ai_log import AICallLog +from innercontext.models.api_metadata import ResponseMetadata, TokenMetrics from innercontext.models.enums import ( AbsorptionSpeed, DayTime, @@ -63,6 +65,37 @@ logger = logging.getLogger(__name__) router = APIRouter() + +def _build_response_metadata(session: Session, log_id: Any) -> ResponseMetadata | None: + """Build ResponseMetadata from AICallLog for Phase 3 observability.""" + if not log_id: + return None + + log = session.get(AICallLog, log_id) + if not log: + return None + + token_metrics = None + if ( + log.prompt_tokens is not None + and log.completion_tokens is not None + and log.total_tokens is not None + ): + token_metrics = TokenMetrics( + prompt_tokens=log.prompt_tokens, + completion_tokens=log.completion_tokens, + thoughts_tokens=log.thoughts_tokens, + total_tokens=log.total_tokens, + ) + + return ResponseMetadata( + model_used=log.model, + duration_ms=log.duration_ms or 0, + reasoning_chain=log.reasoning_chain, + token_metrics=token_metrics, + ) + + PricingSource = Literal["category", "fallback", "insufficient_data"] PricingOutput = tuple[PriceTier | None, float | None, PricingSource | None] PricingOutputs = dict[UUID, PricingOutput] @@ -234,6 +267,10 @@ class ProductSuggestion(PydanticBase): class ShoppingSuggestionResponse(PydanticBase): suggestions: list[ProductSuggestion] reasoning: str + # Phase 3: Observability fields + validation_warnings: list[str] | None = None + auto_fixes_applied: list[str] | None = None + metadata: "ResponseMetadata | None" = None class _ProductSuggestionOut(PydanticBase): @@ -609,7 +646,7 @@ def parse_product_text(data: ProductParseRequest) -> ProductParseResponse: # Phase 1: Sanitize input text sanitized_text = sanitize_user_input(data.text, max_length=10000) - response = call_gemini( + response, log_id = call_gemini( endpoint="products/parse-text", contents=f"Extract product data from this text:\n\n{sanitized_text}", config=get_extraction_config( @@ -997,7 +1034,7 @@ def suggest_shopping(session: Session = Depends(get_session)): } try: - response = call_gemini_with_function_tools( + response, log_id = call_gemini_with_function_tools( endpoint="products/suggest", contents=prompt, config=config, @@ -1020,7 +1057,7 @@ def suggest_shopping(session: Session = Depends(get_session)): "- Zasugeruj tylko najbardziej bezpieczne i realistyczne typy produktow do uzupelnienia brakow," " unikaj agresywnych aktywnych przy niepelnych danych.\n" ) - response = call_gemini( + response, log_id = call_gemini( endpoint="products/suggest", contents=conservative_prompt, config=get_creative_config( @@ -1044,12 +1081,6 @@ def suggest_shopping(session: Session = Depends(get_session)): except json.JSONDecodeError as e: raise HTTPException(status_code=502, detail=f"LLM returned invalid JSON: {e}") - shopping_response = ShoppingSuggestionResponse( - suggestions=[ProductSuggestion(**s) for s in parsed.get("suggestions", [])], - reasoning=parsed.get("reasoning", ""), - ) - - # Phase 1: Validate the shopping suggestions # Get products with inventory (those user already owns) products_with_inventory = session.exec( select(Product).join(ProductInventory).distinct() @@ -1061,7 +1092,15 @@ def suggest_shopping(session: Session = Depends(get_session)): valid_targets=set(SkinConcern), ) + # Phase 1: Validate the shopping suggestions validator = ShoppingValidator() + + # Build initial shopping response without metadata + shopping_response = ShoppingSuggestionResponse( + suggestions=[ProductSuggestion(**s) for s in parsed.get("suggestions", [])], + reasoning=parsed.get("reasoning", ""), + ) + validation_result = validator.validate(shopping_response, shopping_context) if not validation_result.is_valid: @@ -1073,7 +1112,14 @@ def suggest_shopping(session: Session = Depends(get_session)): detail=f"Generated shopping suggestions failed validation: {'; '.join(validation_result.errors)}", ) + # Phase 3: Add warnings, auto-fixes, and metadata to response if validation_result.warnings: logger.warning(f"Shopping suggestion warnings: {validation_result.warnings}") + shopping_response.validation_warnings = validation_result.warnings + + if validation_result.auto_fixes: + shopping_response.auto_fixes_applied = validation_result.auto_fixes + + shopping_response.metadata = _build_response_metadata(session, log_id) return shopping_response diff --git a/backend/innercontext/api/routines.py b/backend/innercontext/api/routines.py index 7134beb..2de0ae4 100644 --- a/backend/innercontext/api/routines.py +++ b/backend/innercontext/api/routines.py @@ -2,7 +2,7 @@ import json import logging import math from datetime import date, timedelta -from typing import Optional +from typing import Any, Optional from uuid import UUID, uuid4 from fastapi import APIRouter, Depends, HTTPException @@ -40,6 +40,8 @@ from innercontext.models import ( RoutineStep, SkinConditionSnapshot, ) +from innercontext.models.ai_log import AICallLog +from innercontext.models.api_metadata import ResponseMetadata, TokenMetrics from innercontext.models.enums import GroomingAction, PartOfDay from innercontext.validators import BatchValidator, RoutineSuggestionValidator from innercontext.validators.batch_validator import BatchValidationContext @@ -47,6 +49,37 @@ from innercontext.validators.routine_validator import RoutineValidationContext logger = logging.getLogger(__name__) + +def _build_response_metadata(session: Session, log_id: Any) -> ResponseMetadata | None: + """Build ResponseMetadata from AICallLog for Phase 3 observability.""" + if not log_id: + return None + + log = session.get(AICallLog, log_id) + if not log: + return None + + token_metrics = None + if ( + log.prompt_tokens is not None + and log.completion_tokens is not None + and log.total_tokens is not None + ): + token_metrics = TokenMetrics( + prompt_tokens=log.prompt_tokens, + completion_tokens=log.completion_tokens, + thoughts_tokens=log.thoughts_tokens, + total_tokens=log.total_tokens, + ) + + return ResponseMetadata( + model_used=log.model, + duration_ms=log.duration_ms or 0, + reasoning_chain=log.reasoning_chain, + token_metrics=token_metrics, + ) + + router = APIRouter() @@ -124,6 +157,10 @@ class RoutineSuggestion(SQLModel): steps: list[SuggestedStep] reasoning: str summary: Optional[RoutineSuggestionSummary] = None + # Phase 3: Observability fields + validation_warnings: Optional[list[str]] = None + auto_fixes_applied: Optional[list[str]] = None + metadata: Optional[ResponseMetadata] = None class SuggestBatchRequest(SQLModel): @@ -144,6 +181,10 @@ class DayPlan(SQLModel): class BatchSuggestion(SQLModel): days: list[DayPlan] overall_reasoning: str + # Phase 3: Observability fields + validation_warnings: Optional[list[str]] = None + auto_fixes_applied: Optional[list[str]] = None + metadata: Optional[ResponseMetadata] = None # --------------------------------------------------------------------------- @@ -674,7 +715,7 @@ def suggest_routine( } try: - response = call_gemini_with_function_tools( + response, log_id = call_gemini_with_function_tools( endpoint="routines/suggest", contents=prompt, config=config, @@ -698,7 +739,7 @@ def suggest_routine( " preferujac lagodne produkty wspierajace bariere i fotoprotekcje.\n" "- Gdy masz watpliwosci, pomijaj ryzykowne aktywne kroki.\n" ) - response = call_gemini( + response, log_id = call_gemini( endpoint="routines/suggest", contents=conservative_prompt, config=get_creative_config( @@ -760,13 +801,6 @@ def suggest_routine( confidence=confidence, ) - # Phase 1: Validate the response - suggestion = RoutineSuggestion( - steps=steps, - reasoning=parsed.get("reasoning", ""), - summary=summary, - ) - # Get skin snapshot for barrier state stmt = select(SkinConditionSnapshot).order_by( col(SkinConditionSnapshot.snapshot_date).desc() @@ -790,8 +824,16 @@ def suggest_routine( just_shaved=False, # Could be enhanced with grooming context ) - # Validate + # Phase 1: Validate the response validator = RoutineSuggestionValidator() + + # Build initial suggestion without metadata + suggestion = RoutineSuggestion( + steps=steps, + reasoning=parsed.get("reasoning", ""), + summary=summary, + ) + validation_result = validator.validate(suggestion, validation_context) if not validation_result.is_valid: @@ -805,10 +847,15 @@ def suggest_routine( detail=f"Generated routine failed safety validation: {'; '.join(validation_result.errors)}", ) - # Add warnings to response if any + # Phase 3: Add warnings, auto-fixes, and metadata to response if validation_result.warnings: logger.warning(f"Routine suggestion warnings: {validation_result.warnings}") - # Note: We'll add warnings field to RoutineSuggestion model in a moment + suggestion.validation_warnings = validation_result.warnings + + if validation_result.auto_fixes: + suggestion.auto_fixes_applied = validation_result.auto_fixes + + suggestion.metadata = _build_response_metadata(session, log_id) return suggestion @@ -878,7 +925,7 @@ def suggest_batch( "\nZwróć JSON zgodny ze schematem." ) - response = call_gemini( + response, log_id = call_gemini( endpoint="routines/suggest-batch", contents=prompt, config=get_creative_config( @@ -936,11 +983,6 @@ def suggest_batch( ) ) - batch_suggestion = BatchSuggestion( - days=days, overall_reasoning=parsed.get("overall_reasoning", "") - ) - - # Phase 1: Validate the batch response # Get skin snapshot for barrier state stmt = select(SkinConditionSnapshot).order_by( col(SkinConditionSnapshot.snapshot_date).desc() @@ -964,8 +1006,14 @@ def suggest_batch( last_used_dates=last_used_dates_by_uuid, ) - # Validate + # Phase 1: Validate the batch response batch_validator = BatchValidator() + + # Build initial batch suggestion without metadata + batch_suggestion = BatchSuggestion( + days=days, overall_reasoning=parsed.get("overall_reasoning", "") + ) + validation_result = batch_validator.validate(batch_suggestion, batch_context) if not validation_result.is_valid: @@ -977,9 +1025,15 @@ def suggest_batch( detail=f"Generated batch plan failed safety validation: {'; '.join(validation_result.errors)}", ) - # Log warnings if any + # Phase 3: Add warnings, auto-fixes, and metadata to response if validation_result.warnings: logger.warning(f"Batch routine warnings: {validation_result.warnings}") + batch_suggestion.validation_warnings = validation_result.warnings + + if validation_result.auto_fixes: + batch_suggestion.auto_fixes_applied = validation_result.auto_fixes + + batch_suggestion.metadata = _build_response_metadata(session, log_id) return batch_suggestion diff --git a/backend/innercontext/api/skincare.py b/backend/innercontext/api/skincare.py index bfa0b6b..730db1e 100644 --- a/backend/innercontext/api/skincare.py +++ b/backend/innercontext/api/skincare.py @@ -179,7 +179,7 @@ async def analyze_skin_photos( ) image_summary = f"{len(photos)} image(s): {', '.join((p.content_type or 'unknown') for p in photos)}" - response = call_gemini( + response, log_id = call_gemini( endpoint="skincare/analyze-photos", contents=parts, config=get_extraction_config( diff --git a/backend/innercontext/llm.py b/backend/innercontext/llm.py index 89c19b0..2c08fbb 100644 --- a/backend/innercontext/llm.py +++ b/backend/innercontext/llm.py @@ -109,8 +109,12 @@ def call_gemini( config: genai_types.GenerateContentConfig, user_input: str | None = None, tool_trace: dict[str, Any] | None = None, -): - """Call Gemini, log full request + response to DB, return response unchanged.""" +) -> tuple[Any, Any]: + """Call Gemini, log full request + response to DB. + + Returns: + Tuple of (response, log_id) where log_id is the AICallLog.id (UUID) or None if logging failed. + """ from sqlmodel import Session from db import engine # deferred to avoid circular import at module load @@ -127,7 +131,13 @@ def call_gemini( user_input = str(contents) start = time.monotonic() - success, error_detail, response, finish_reason = True, None, None, None + success, error_detail, response, finish_reason, log_id = ( + True, + None, + None, + None, + None, + ) try: response = client.models.generate_content( model=model, contents=contents, config=config @@ -199,7 +209,9 @@ def call_gemini( with Session(engine) as s: s.add(log) s.commit() - return response + s.refresh(log) + log_id = log.id + return response, log_id def call_gemini_with_function_tools( @@ -210,17 +222,22 @@ def call_gemini_with_function_tools( function_handlers: dict[str, Callable[[dict[str, Any]], dict[str, Any]]], user_input: str | None = None, max_tool_roundtrips: int = 2, -): - """Call Gemini with function-calling loop until final response text is produced.""" +) -> tuple[Any, Any]: + """Call Gemini with function-calling loop until final response text is produced. + + Returns: + Tuple of (response, log_id) where log_id is the AICallLog.id (UUID) of the final call. + """ if max_tool_roundtrips < 0: raise ValueError("max_tool_roundtrips must be >= 0") history = list(contents) if isinstance(contents, list) else [contents] rounds = 0 trace_events: list[dict[str, Any]] = [] + log_id = None while True: - response = call_gemini( + response, log_id = call_gemini( endpoint=endpoint, contents=history, config=config, @@ -233,7 +250,7 @@ def call_gemini_with_function_tools( ) function_calls = list(getattr(response, "function_calls", None) or []) if not function_calls: - return response + return response, log_id if rounds >= max_tool_roundtrips: raise HTTPException( diff --git a/backend/innercontext/models/api_metadata.py b/backend/innercontext/models/api_metadata.py new file mode 100644 index 0000000..639d6cb --- /dev/null +++ b/backend/innercontext/models/api_metadata.py @@ -0,0 +1,29 @@ +"""Models for API response metadata (Phase 3: UI/UX Observability).""" + +from pydantic import BaseModel + + +class TokenMetrics(BaseModel): + """Token usage metrics from LLM call.""" + + prompt_tokens: int + completion_tokens: int + thoughts_tokens: int | None = None + total_tokens: int + + +class ResponseMetadata(BaseModel): + """Metadata about the LLM response for observability.""" + + model_used: str + duration_ms: int + reasoning_chain: str | None = None + token_metrics: TokenMetrics | None = None + + +class EnrichedResponse(BaseModel): + """Base class for API responses with validation and metadata.""" + + validation_warnings: list[str] | None = None + auto_fixes_applied: list[str] | None = None + metadata: ResponseMetadata | None = None diff --git a/frontend/src/lib/types.ts b/frontend/src/lib/types.ts index 77758d5..a96230f 100644 --- a/frontend/src/lib/types.ts +++ b/frontend/src/lib/types.ts @@ -241,10 +241,29 @@ export interface RoutineSuggestionSummary { confidence: number; } +// Phase 3: Observability metadata types +export interface TokenMetrics { + prompt_tokens: number; + completion_tokens: number; + thoughts_tokens?: number; + total_tokens: number; +} + +export interface ResponseMetadata { + model_used: string; + duration_ms: number; + reasoning_chain?: string; + token_metrics?: TokenMetrics; +} + export interface RoutineSuggestion { steps: SuggestedStep[]; reasoning: string; summary?: RoutineSuggestionSummary; + // Phase 3: Observability fields + validation_warnings?: string[]; + auto_fixes_applied?: string[]; + metadata?: ResponseMetadata; } export interface DayPlan { @@ -257,6 +276,10 @@ export interface DayPlan { export interface BatchSuggestion { days: DayPlan[]; overall_reasoning: string; + // Phase 3: Observability fields + validation_warnings?: string[]; + auto_fixes_applied?: string[]; + metadata?: ResponseMetadata; } // ─── Shopping suggestion types ─────────────────────────────────────────────── @@ -274,6 +297,10 @@ export interface ProductSuggestion { export interface ShoppingSuggestionResponse { suggestions: ProductSuggestion[]; reasoning: string; + // Phase 3: Observability fields + validation_warnings?: string[]; + auto_fixes_applied?: string[]; + metadata?: ResponseMetadata; } // ─── Health types ────────────────────────────────────────────────────────────