feat(api): add Phase 3 observability - expose validation warnings and metadata to frontend

Backend changes:
- Create ResponseMetadata and TokenMetrics models for API responses
- Modify call_gemini() and call_gemini_with_function_tools() to return (response, log_id) tuple
- Add _build_response_metadata() helper to extract metadata from AICallLog
- Update routines API (/suggest, /suggest-batch) to populate validation_warnings, auto_fixes_applied, and metadata
- Update products API (/suggest) to populate observability fields
- Update skincare API to handle new return signature

Frontend changes:
- Add TypeScript types: TokenMetrics, ResponseMetadata
- Update RoutineSuggestion, BatchSuggestion, ShoppingSuggestionResponse with observability fields

Next: Create UI components to display warnings, reasoning chains, and token metrics
This commit is contained in:
Piotr Oleszczyk 2026-03-06 15:50:28 +01:00
parent 3bf19d8acb
commit 3c3248c2ea
6 changed files with 213 additions and 40 deletions

View file

@ -1,7 +1,7 @@
import json import json
import logging import logging
from datetime import date from datetime import date
from typing import Literal, Optional from typing import Any, Literal, Optional
from uuid import UUID, uuid4 from uuid import UUID, uuid4
from fastapi import APIRouter, Depends, HTTPException, Query from fastapi import APIRouter, Depends, HTTPException, Query
@ -42,6 +42,8 @@ from innercontext.models import (
SkinConcern, SkinConcern,
SkinConditionSnapshot, SkinConditionSnapshot,
) )
from innercontext.models.ai_log import AICallLog
from innercontext.models.api_metadata import ResponseMetadata, TokenMetrics
from innercontext.models.enums import ( from innercontext.models.enums import (
AbsorptionSpeed, AbsorptionSpeed,
DayTime, DayTime,
@ -63,6 +65,37 @@ logger = logging.getLogger(__name__)
router = APIRouter() router = APIRouter()
def _build_response_metadata(session: Session, log_id: Any) -> ResponseMetadata | None:
"""Build ResponseMetadata from AICallLog for Phase 3 observability."""
if not log_id:
return None
log = session.get(AICallLog, log_id)
if not log:
return None
token_metrics = None
if (
log.prompt_tokens is not None
and log.completion_tokens is not None
and log.total_tokens is not None
):
token_metrics = TokenMetrics(
prompt_tokens=log.prompt_tokens,
completion_tokens=log.completion_tokens,
thoughts_tokens=log.thoughts_tokens,
total_tokens=log.total_tokens,
)
return ResponseMetadata(
model_used=log.model,
duration_ms=log.duration_ms or 0,
reasoning_chain=log.reasoning_chain,
token_metrics=token_metrics,
)
PricingSource = Literal["category", "fallback", "insufficient_data"] PricingSource = Literal["category", "fallback", "insufficient_data"]
PricingOutput = tuple[PriceTier | None, float | None, PricingSource | None] PricingOutput = tuple[PriceTier | None, float | None, PricingSource | None]
PricingOutputs = dict[UUID, PricingOutput] PricingOutputs = dict[UUID, PricingOutput]
@ -234,6 +267,10 @@ class ProductSuggestion(PydanticBase):
class ShoppingSuggestionResponse(PydanticBase): class ShoppingSuggestionResponse(PydanticBase):
suggestions: list[ProductSuggestion] suggestions: list[ProductSuggestion]
reasoning: str reasoning: str
# Phase 3: Observability fields
validation_warnings: list[str] | None = None
auto_fixes_applied: list[str] | None = None
metadata: "ResponseMetadata | None" = None
class _ProductSuggestionOut(PydanticBase): class _ProductSuggestionOut(PydanticBase):
@ -609,7 +646,7 @@ def parse_product_text(data: ProductParseRequest) -> ProductParseResponse:
# Phase 1: Sanitize input text # Phase 1: Sanitize input text
sanitized_text = sanitize_user_input(data.text, max_length=10000) sanitized_text = sanitize_user_input(data.text, max_length=10000)
response = call_gemini( response, log_id = call_gemini(
endpoint="products/parse-text", endpoint="products/parse-text",
contents=f"Extract product data from this text:\n\n{sanitized_text}", contents=f"Extract product data from this text:\n\n{sanitized_text}",
config=get_extraction_config( config=get_extraction_config(
@ -997,7 +1034,7 @@ def suggest_shopping(session: Session = Depends(get_session)):
} }
try: try:
response = call_gemini_with_function_tools( response, log_id = call_gemini_with_function_tools(
endpoint="products/suggest", endpoint="products/suggest",
contents=prompt, contents=prompt,
config=config, config=config,
@ -1020,7 +1057,7 @@ def suggest_shopping(session: Session = Depends(get_session)):
"- Zasugeruj tylko najbardziej bezpieczne i realistyczne typy produktow do uzupelnienia brakow," "- Zasugeruj tylko najbardziej bezpieczne i realistyczne typy produktow do uzupelnienia brakow,"
" unikaj agresywnych aktywnych przy niepelnych danych.\n" " unikaj agresywnych aktywnych przy niepelnych danych.\n"
) )
response = call_gemini( response, log_id = call_gemini(
endpoint="products/suggest", endpoint="products/suggest",
contents=conservative_prompt, contents=conservative_prompt,
config=get_creative_config( config=get_creative_config(
@ -1044,12 +1081,6 @@ def suggest_shopping(session: Session = Depends(get_session)):
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
raise HTTPException(status_code=502, detail=f"LLM returned invalid JSON: {e}") raise HTTPException(status_code=502, detail=f"LLM returned invalid JSON: {e}")
shopping_response = ShoppingSuggestionResponse(
suggestions=[ProductSuggestion(**s) for s in parsed.get("suggestions", [])],
reasoning=parsed.get("reasoning", ""),
)
# Phase 1: Validate the shopping suggestions
# Get products with inventory (those user already owns) # Get products with inventory (those user already owns)
products_with_inventory = session.exec( products_with_inventory = session.exec(
select(Product).join(ProductInventory).distinct() select(Product).join(ProductInventory).distinct()
@ -1061,7 +1092,15 @@ def suggest_shopping(session: Session = Depends(get_session)):
valid_targets=set(SkinConcern), valid_targets=set(SkinConcern),
) )
# Phase 1: Validate the shopping suggestions
validator = ShoppingValidator() validator = ShoppingValidator()
# Build initial shopping response without metadata
shopping_response = ShoppingSuggestionResponse(
suggestions=[ProductSuggestion(**s) for s in parsed.get("suggestions", [])],
reasoning=parsed.get("reasoning", ""),
)
validation_result = validator.validate(shopping_response, shopping_context) validation_result = validator.validate(shopping_response, shopping_context)
if not validation_result.is_valid: if not validation_result.is_valid:
@ -1073,7 +1112,14 @@ def suggest_shopping(session: Session = Depends(get_session)):
detail=f"Generated shopping suggestions failed validation: {'; '.join(validation_result.errors)}", detail=f"Generated shopping suggestions failed validation: {'; '.join(validation_result.errors)}",
) )
# Phase 3: Add warnings, auto-fixes, and metadata to response
if validation_result.warnings: if validation_result.warnings:
logger.warning(f"Shopping suggestion warnings: {validation_result.warnings}") logger.warning(f"Shopping suggestion warnings: {validation_result.warnings}")
shopping_response.validation_warnings = validation_result.warnings
if validation_result.auto_fixes:
shopping_response.auto_fixes_applied = validation_result.auto_fixes
shopping_response.metadata = _build_response_metadata(session, log_id)
return shopping_response return shopping_response

View file

@ -2,7 +2,7 @@ import json
import logging import logging
import math import math
from datetime import date, timedelta from datetime import date, timedelta
from typing import Optional from typing import Any, Optional
from uuid import UUID, uuid4 from uuid import UUID, uuid4
from fastapi import APIRouter, Depends, HTTPException from fastapi import APIRouter, Depends, HTTPException
@ -40,6 +40,8 @@ from innercontext.models import (
RoutineStep, RoutineStep,
SkinConditionSnapshot, SkinConditionSnapshot,
) )
from innercontext.models.ai_log import AICallLog
from innercontext.models.api_metadata import ResponseMetadata, TokenMetrics
from innercontext.models.enums import GroomingAction, PartOfDay from innercontext.models.enums import GroomingAction, PartOfDay
from innercontext.validators import BatchValidator, RoutineSuggestionValidator from innercontext.validators import BatchValidator, RoutineSuggestionValidator
from innercontext.validators.batch_validator import BatchValidationContext from innercontext.validators.batch_validator import BatchValidationContext
@ -47,6 +49,37 @@ from innercontext.validators.routine_validator import RoutineValidationContext
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _build_response_metadata(session: Session, log_id: Any) -> ResponseMetadata | None:
"""Build ResponseMetadata from AICallLog for Phase 3 observability."""
if not log_id:
return None
log = session.get(AICallLog, log_id)
if not log:
return None
token_metrics = None
if (
log.prompt_tokens is not None
and log.completion_tokens is not None
and log.total_tokens is not None
):
token_metrics = TokenMetrics(
prompt_tokens=log.prompt_tokens,
completion_tokens=log.completion_tokens,
thoughts_tokens=log.thoughts_tokens,
total_tokens=log.total_tokens,
)
return ResponseMetadata(
model_used=log.model,
duration_ms=log.duration_ms or 0,
reasoning_chain=log.reasoning_chain,
token_metrics=token_metrics,
)
router = APIRouter() router = APIRouter()
@ -124,6 +157,10 @@ class RoutineSuggestion(SQLModel):
steps: list[SuggestedStep] steps: list[SuggestedStep]
reasoning: str reasoning: str
summary: Optional[RoutineSuggestionSummary] = None summary: Optional[RoutineSuggestionSummary] = None
# Phase 3: Observability fields
validation_warnings: Optional[list[str]] = None
auto_fixes_applied: Optional[list[str]] = None
metadata: Optional[ResponseMetadata] = None
class SuggestBatchRequest(SQLModel): class SuggestBatchRequest(SQLModel):
@ -144,6 +181,10 @@ class DayPlan(SQLModel):
class BatchSuggestion(SQLModel): class BatchSuggestion(SQLModel):
days: list[DayPlan] days: list[DayPlan]
overall_reasoning: str overall_reasoning: str
# Phase 3: Observability fields
validation_warnings: Optional[list[str]] = None
auto_fixes_applied: Optional[list[str]] = None
metadata: Optional[ResponseMetadata] = None
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -674,7 +715,7 @@ def suggest_routine(
} }
try: try:
response = call_gemini_with_function_tools( response, log_id = call_gemini_with_function_tools(
endpoint="routines/suggest", endpoint="routines/suggest",
contents=prompt, contents=prompt,
config=config, config=config,
@ -698,7 +739,7 @@ def suggest_routine(
" preferujac lagodne produkty wspierajace bariere i fotoprotekcje.\n" " preferujac lagodne produkty wspierajace bariere i fotoprotekcje.\n"
"- Gdy masz watpliwosci, pomijaj ryzykowne aktywne kroki.\n" "- Gdy masz watpliwosci, pomijaj ryzykowne aktywne kroki.\n"
) )
response = call_gemini( response, log_id = call_gemini(
endpoint="routines/suggest", endpoint="routines/suggest",
contents=conservative_prompt, contents=conservative_prompt,
config=get_creative_config( config=get_creative_config(
@ -760,13 +801,6 @@ def suggest_routine(
confidence=confidence, confidence=confidence,
) )
# Phase 1: Validate the response
suggestion = RoutineSuggestion(
steps=steps,
reasoning=parsed.get("reasoning", ""),
summary=summary,
)
# Get skin snapshot for barrier state # Get skin snapshot for barrier state
stmt = select(SkinConditionSnapshot).order_by( stmt = select(SkinConditionSnapshot).order_by(
col(SkinConditionSnapshot.snapshot_date).desc() col(SkinConditionSnapshot.snapshot_date).desc()
@ -790,8 +824,16 @@ def suggest_routine(
just_shaved=False, # Could be enhanced with grooming context just_shaved=False, # Could be enhanced with grooming context
) )
# Validate # Phase 1: Validate the response
validator = RoutineSuggestionValidator() validator = RoutineSuggestionValidator()
# Build initial suggestion without metadata
suggestion = RoutineSuggestion(
steps=steps,
reasoning=parsed.get("reasoning", ""),
summary=summary,
)
validation_result = validator.validate(suggestion, validation_context) validation_result = validator.validate(suggestion, validation_context)
if not validation_result.is_valid: if not validation_result.is_valid:
@ -805,10 +847,15 @@ def suggest_routine(
detail=f"Generated routine failed safety validation: {'; '.join(validation_result.errors)}", detail=f"Generated routine failed safety validation: {'; '.join(validation_result.errors)}",
) )
# Add warnings to response if any # Phase 3: Add warnings, auto-fixes, and metadata to response
if validation_result.warnings: if validation_result.warnings:
logger.warning(f"Routine suggestion warnings: {validation_result.warnings}") logger.warning(f"Routine suggestion warnings: {validation_result.warnings}")
# Note: We'll add warnings field to RoutineSuggestion model in a moment suggestion.validation_warnings = validation_result.warnings
if validation_result.auto_fixes:
suggestion.auto_fixes_applied = validation_result.auto_fixes
suggestion.metadata = _build_response_metadata(session, log_id)
return suggestion return suggestion
@ -878,7 +925,7 @@ def suggest_batch(
"\nZwróć JSON zgodny ze schematem." "\nZwróć JSON zgodny ze schematem."
) )
response = call_gemini( response, log_id = call_gemini(
endpoint="routines/suggest-batch", endpoint="routines/suggest-batch",
contents=prompt, contents=prompt,
config=get_creative_config( config=get_creative_config(
@ -936,11 +983,6 @@ def suggest_batch(
) )
) )
batch_suggestion = BatchSuggestion(
days=days, overall_reasoning=parsed.get("overall_reasoning", "")
)
# Phase 1: Validate the batch response
# Get skin snapshot for barrier state # Get skin snapshot for barrier state
stmt = select(SkinConditionSnapshot).order_by( stmt = select(SkinConditionSnapshot).order_by(
col(SkinConditionSnapshot.snapshot_date).desc() col(SkinConditionSnapshot.snapshot_date).desc()
@ -964,8 +1006,14 @@ def suggest_batch(
last_used_dates=last_used_dates_by_uuid, last_used_dates=last_used_dates_by_uuid,
) )
# Validate # Phase 1: Validate the batch response
batch_validator = BatchValidator() batch_validator = BatchValidator()
# Build initial batch suggestion without metadata
batch_suggestion = BatchSuggestion(
days=days, overall_reasoning=parsed.get("overall_reasoning", "")
)
validation_result = batch_validator.validate(batch_suggestion, batch_context) validation_result = batch_validator.validate(batch_suggestion, batch_context)
if not validation_result.is_valid: if not validation_result.is_valid:
@ -977,9 +1025,15 @@ def suggest_batch(
detail=f"Generated batch plan failed safety validation: {'; '.join(validation_result.errors)}", detail=f"Generated batch plan failed safety validation: {'; '.join(validation_result.errors)}",
) )
# Log warnings if any # Phase 3: Add warnings, auto-fixes, and metadata to response
if validation_result.warnings: if validation_result.warnings:
logger.warning(f"Batch routine warnings: {validation_result.warnings}") logger.warning(f"Batch routine warnings: {validation_result.warnings}")
batch_suggestion.validation_warnings = validation_result.warnings
if validation_result.auto_fixes:
batch_suggestion.auto_fixes_applied = validation_result.auto_fixes
batch_suggestion.metadata = _build_response_metadata(session, log_id)
return batch_suggestion return batch_suggestion

View file

@ -179,7 +179,7 @@ async def analyze_skin_photos(
) )
image_summary = f"{len(photos)} image(s): {', '.join((p.content_type or 'unknown') for p in photos)}" image_summary = f"{len(photos)} image(s): {', '.join((p.content_type or 'unknown') for p in photos)}"
response = call_gemini( response, log_id = call_gemini(
endpoint="skincare/analyze-photos", endpoint="skincare/analyze-photos",
contents=parts, contents=parts,
config=get_extraction_config( config=get_extraction_config(

View file

@ -109,8 +109,12 @@ def call_gemini(
config: genai_types.GenerateContentConfig, config: genai_types.GenerateContentConfig,
user_input: str | None = None, user_input: str | None = None,
tool_trace: dict[str, Any] | None = None, tool_trace: dict[str, Any] | None = None,
): ) -> tuple[Any, Any]:
"""Call Gemini, log full request + response to DB, return response unchanged.""" """Call Gemini, log full request + response to DB.
Returns:
Tuple of (response, log_id) where log_id is the AICallLog.id (UUID) or None if logging failed.
"""
from sqlmodel import Session from sqlmodel import Session
from db import engine # deferred to avoid circular import at module load from db import engine # deferred to avoid circular import at module load
@ -127,7 +131,13 @@ def call_gemini(
user_input = str(contents) user_input = str(contents)
start = time.monotonic() start = time.monotonic()
success, error_detail, response, finish_reason = True, None, None, None success, error_detail, response, finish_reason, log_id = (
True,
None,
None,
None,
None,
)
try: try:
response = client.models.generate_content( response = client.models.generate_content(
model=model, contents=contents, config=config model=model, contents=contents, config=config
@ -199,7 +209,9 @@ def call_gemini(
with Session(engine) as s: with Session(engine) as s:
s.add(log) s.add(log)
s.commit() s.commit()
return response s.refresh(log)
log_id = log.id
return response, log_id
def call_gemini_with_function_tools( def call_gemini_with_function_tools(
@ -210,17 +222,22 @@ def call_gemini_with_function_tools(
function_handlers: dict[str, Callable[[dict[str, Any]], dict[str, Any]]], function_handlers: dict[str, Callable[[dict[str, Any]], dict[str, Any]]],
user_input: str | None = None, user_input: str | None = None,
max_tool_roundtrips: int = 2, max_tool_roundtrips: int = 2,
): ) -> tuple[Any, Any]:
"""Call Gemini with function-calling loop until final response text is produced.""" """Call Gemini with function-calling loop until final response text is produced.
Returns:
Tuple of (response, log_id) where log_id is the AICallLog.id (UUID) of the final call.
"""
if max_tool_roundtrips < 0: if max_tool_roundtrips < 0:
raise ValueError("max_tool_roundtrips must be >= 0") raise ValueError("max_tool_roundtrips must be >= 0")
history = list(contents) if isinstance(contents, list) else [contents] history = list(contents) if isinstance(contents, list) else [contents]
rounds = 0 rounds = 0
trace_events: list[dict[str, Any]] = [] trace_events: list[dict[str, Any]] = []
log_id = None
while True: while True:
response = call_gemini( response, log_id = call_gemini(
endpoint=endpoint, endpoint=endpoint,
contents=history, contents=history,
config=config, config=config,
@ -233,7 +250,7 @@ def call_gemini_with_function_tools(
) )
function_calls = list(getattr(response, "function_calls", None) or []) function_calls = list(getattr(response, "function_calls", None) or [])
if not function_calls: if not function_calls:
return response return response, log_id
if rounds >= max_tool_roundtrips: if rounds >= max_tool_roundtrips:
raise HTTPException( raise HTTPException(

View file

@ -0,0 +1,29 @@
"""Models for API response metadata (Phase 3: UI/UX Observability)."""
from pydantic import BaseModel
class TokenMetrics(BaseModel):
"""Token usage metrics from LLM call."""
prompt_tokens: int
completion_tokens: int
thoughts_tokens: int | None = None
total_tokens: int
class ResponseMetadata(BaseModel):
"""Metadata about the LLM response for observability."""
model_used: str
duration_ms: int
reasoning_chain: str | None = None
token_metrics: TokenMetrics | None = None
class EnrichedResponse(BaseModel):
"""Base class for API responses with validation and metadata."""
validation_warnings: list[str] | None = None
auto_fixes_applied: list[str] | None = None
metadata: ResponseMetadata | None = None

View file

@ -241,10 +241,29 @@ export interface RoutineSuggestionSummary {
confidence: number; confidence: number;
} }
// Phase 3: Observability metadata types
export interface TokenMetrics {
prompt_tokens: number;
completion_tokens: number;
thoughts_tokens?: number;
total_tokens: number;
}
export interface ResponseMetadata {
model_used: string;
duration_ms: number;
reasoning_chain?: string;
token_metrics?: TokenMetrics;
}
export interface RoutineSuggestion { export interface RoutineSuggestion {
steps: SuggestedStep[]; steps: SuggestedStep[];
reasoning: string; reasoning: string;
summary?: RoutineSuggestionSummary; summary?: RoutineSuggestionSummary;
// Phase 3: Observability fields
validation_warnings?: string[];
auto_fixes_applied?: string[];
metadata?: ResponseMetadata;
} }
export interface DayPlan { export interface DayPlan {
@ -257,6 +276,10 @@ export interface DayPlan {
export interface BatchSuggestion { export interface BatchSuggestion {
days: DayPlan[]; days: DayPlan[];
overall_reasoning: string; overall_reasoning: string;
// Phase 3: Observability fields
validation_warnings?: string[];
auto_fixes_applied?: string[];
metadata?: ResponseMetadata;
} }
// ─── Shopping suggestion types ─────────────────────────────────────────────── // ─── Shopping suggestion types ───────────────────────────────────────────────
@ -274,6 +297,10 @@ export interface ProductSuggestion {
export interface ShoppingSuggestionResponse { export interface ShoppingSuggestionResponse {
suggestions: ProductSuggestion[]; suggestions: ProductSuggestion[];
reasoning: string; reasoning: string;
// Phase 3: Observability fields
validation_warnings?: string[];
auto_fixes_applied?: string[];
metadata?: ResponseMetadata;
} }
// ─── Health types ──────────────────────────────────────────────────────────── // ─── Health types ────────────────────────────────────────────────────────────