feat(api): add Phase 3 observability - expose validation warnings and metadata to frontend

Backend changes:
- Create ResponseMetadata and TokenMetrics models for API responses
- Modify call_gemini() and call_gemini_with_function_tools() to return (response, log_id) tuple
- Add _build_response_metadata() helper to extract metadata from AICallLog
- Update routines API (/suggest, /suggest-batch) to populate validation_warnings, auto_fixes_applied, and metadata
- Update products API (/suggest) to populate observability fields
- Update skincare API to handle new return signature

Frontend changes:
- Add TypeScript types: TokenMetrics, ResponseMetadata
- Update RoutineSuggestion, BatchSuggestion, ShoppingSuggestionResponse with observability fields

Next: Create UI components to display warnings, reasoning chains, and token metrics
This commit is contained in:
Piotr Oleszczyk 2026-03-06 15:50:28 +01:00
parent 3bf19d8acb
commit 3c3248c2ea
6 changed files with 213 additions and 40 deletions

View file

@ -1,7 +1,7 @@
import json
import logging
from datetime import date
from typing import Literal, Optional
from typing import Any, Literal, Optional
from uuid import UUID, uuid4
from fastapi import APIRouter, Depends, HTTPException, Query
@ -42,6 +42,8 @@ from innercontext.models import (
SkinConcern,
SkinConditionSnapshot,
)
from innercontext.models.ai_log import AICallLog
from innercontext.models.api_metadata import ResponseMetadata, TokenMetrics
from innercontext.models.enums import (
AbsorptionSpeed,
DayTime,
@ -63,6 +65,37 @@ logger = logging.getLogger(__name__)
router = APIRouter()
def _build_response_metadata(session: Session, log_id: Any) -> ResponseMetadata | None:
"""Build ResponseMetadata from AICallLog for Phase 3 observability."""
if not log_id:
return None
log = session.get(AICallLog, log_id)
if not log:
return None
token_metrics = None
if (
log.prompt_tokens is not None
and log.completion_tokens is not None
and log.total_tokens is not None
):
token_metrics = TokenMetrics(
prompt_tokens=log.prompt_tokens,
completion_tokens=log.completion_tokens,
thoughts_tokens=log.thoughts_tokens,
total_tokens=log.total_tokens,
)
return ResponseMetadata(
model_used=log.model,
duration_ms=log.duration_ms or 0,
reasoning_chain=log.reasoning_chain,
token_metrics=token_metrics,
)
PricingSource = Literal["category", "fallback", "insufficient_data"]
PricingOutput = tuple[PriceTier | None, float | None, PricingSource | None]
PricingOutputs = dict[UUID, PricingOutput]
@ -234,6 +267,10 @@ class ProductSuggestion(PydanticBase):
class ShoppingSuggestionResponse(PydanticBase):
suggestions: list[ProductSuggestion]
reasoning: str
# Phase 3: Observability fields
validation_warnings: list[str] | None = None
auto_fixes_applied: list[str] | None = None
metadata: "ResponseMetadata | None" = None
class _ProductSuggestionOut(PydanticBase):
@ -609,7 +646,7 @@ def parse_product_text(data: ProductParseRequest) -> ProductParseResponse:
# Phase 1: Sanitize input text
sanitized_text = sanitize_user_input(data.text, max_length=10000)
response = call_gemini(
response, log_id = call_gemini(
endpoint="products/parse-text",
contents=f"Extract product data from this text:\n\n{sanitized_text}",
config=get_extraction_config(
@ -997,7 +1034,7 @@ def suggest_shopping(session: Session = Depends(get_session)):
}
try:
response = call_gemini_with_function_tools(
response, log_id = call_gemini_with_function_tools(
endpoint="products/suggest",
contents=prompt,
config=config,
@ -1020,7 +1057,7 @@ def suggest_shopping(session: Session = Depends(get_session)):
"- Zasugeruj tylko najbardziej bezpieczne i realistyczne typy produktow do uzupelnienia brakow,"
" unikaj agresywnych aktywnych przy niepelnych danych.\n"
)
response = call_gemini(
response, log_id = call_gemini(
endpoint="products/suggest",
contents=conservative_prompt,
config=get_creative_config(
@ -1044,12 +1081,6 @@ def suggest_shopping(session: Session = Depends(get_session)):
except json.JSONDecodeError as e:
raise HTTPException(status_code=502, detail=f"LLM returned invalid JSON: {e}")
shopping_response = ShoppingSuggestionResponse(
suggestions=[ProductSuggestion(**s) for s in parsed.get("suggestions", [])],
reasoning=parsed.get("reasoning", ""),
)
# Phase 1: Validate the shopping suggestions
# Get products with inventory (those user already owns)
products_with_inventory = session.exec(
select(Product).join(ProductInventory).distinct()
@ -1061,7 +1092,15 @@ def suggest_shopping(session: Session = Depends(get_session)):
valid_targets=set(SkinConcern),
)
# Phase 1: Validate the shopping suggestions
validator = ShoppingValidator()
# Build initial shopping response without metadata
shopping_response = ShoppingSuggestionResponse(
suggestions=[ProductSuggestion(**s) for s in parsed.get("suggestions", [])],
reasoning=parsed.get("reasoning", ""),
)
validation_result = validator.validate(shopping_response, shopping_context)
if not validation_result.is_valid:
@ -1073,7 +1112,14 @@ def suggest_shopping(session: Session = Depends(get_session)):
detail=f"Generated shopping suggestions failed validation: {'; '.join(validation_result.errors)}",
)
# Phase 3: Add warnings, auto-fixes, and metadata to response
if validation_result.warnings:
logger.warning(f"Shopping suggestion warnings: {validation_result.warnings}")
shopping_response.validation_warnings = validation_result.warnings
if validation_result.auto_fixes:
shopping_response.auto_fixes_applied = validation_result.auto_fixes
shopping_response.metadata = _build_response_metadata(session, log_id)
return shopping_response

View file

@ -2,7 +2,7 @@ import json
import logging
import math
from datetime import date, timedelta
from typing import Optional
from typing import Any, Optional
from uuid import UUID, uuid4
from fastapi import APIRouter, Depends, HTTPException
@ -40,6 +40,8 @@ from innercontext.models import (
RoutineStep,
SkinConditionSnapshot,
)
from innercontext.models.ai_log import AICallLog
from innercontext.models.api_metadata import ResponseMetadata, TokenMetrics
from innercontext.models.enums import GroomingAction, PartOfDay
from innercontext.validators import BatchValidator, RoutineSuggestionValidator
from innercontext.validators.batch_validator import BatchValidationContext
@ -47,6 +49,37 @@ from innercontext.validators.routine_validator import RoutineValidationContext
logger = logging.getLogger(__name__)
def _build_response_metadata(session: Session, log_id: Any) -> ResponseMetadata | None:
"""Build ResponseMetadata from AICallLog for Phase 3 observability."""
if not log_id:
return None
log = session.get(AICallLog, log_id)
if not log:
return None
token_metrics = None
if (
log.prompt_tokens is not None
and log.completion_tokens is not None
and log.total_tokens is not None
):
token_metrics = TokenMetrics(
prompt_tokens=log.prompt_tokens,
completion_tokens=log.completion_tokens,
thoughts_tokens=log.thoughts_tokens,
total_tokens=log.total_tokens,
)
return ResponseMetadata(
model_used=log.model,
duration_ms=log.duration_ms or 0,
reasoning_chain=log.reasoning_chain,
token_metrics=token_metrics,
)
router = APIRouter()
@ -124,6 +157,10 @@ class RoutineSuggestion(SQLModel):
steps: list[SuggestedStep]
reasoning: str
summary: Optional[RoutineSuggestionSummary] = None
# Phase 3: Observability fields
validation_warnings: Optional[list[str]] = None
auto_fixes_applied: Optional[list[str]] = None
metadata: Optional[ResponseMetadata] = None
class SuggestBatchRequest(SQLModel):
@ -144,6 +181,10 @@ class DayPlan(SQLModel):
class BatchSuggestion(SQLModel):
days: list[DayPlan]
overall_reasoning: str
# Phase 3: Observability fields
validation_warnings: Optional[list[str]] = None
auto_fixes_applied: Optional[list[str]] = None
metadata: Optional[ResponseMetadata] = None
# ---------------------------------------------------------------------------
@ -674,7 +715,7 @@ def suggest_routine(
}
try:
response = call_gemini_with_function_tools(
response, log_id = call_gemini_with_function_tools(
endpoint="routines/suggest",
contents=prompt,
config=config,
@ -698,7 +739,7 @@ def suggest_routine(
" preferujac lagodne produkty wspierajace bariere i fotoprotekcje.\n"
"- Gdy masz watpliwosci, pomijaj ryzykowne aktywne kroki.\n"
)
response = call_gemini(
response, log_id = call_gemini(
endpoint="routines/suggest",
contents=conservative_prompt,
config=get_creative_config(
@ -760,13 +801,6 @@ def suggest_routine(
confidence=confidence,
)
# Phase 1: Validate the response
suggestion = RoutineSuggestion(
steps=steps,
reasoning=parsed.get("reasoning", ""),
summary=summary,
)
# Get skin snapshot for barrier state
stmt = select(SkinConditionSnapshot).order_by(
col(SkinConditionSnapshot.snapshot_date).desc()
@ -790,8 +824,16 @@ def suggest_routine(
just_shaved=False, # Could be enhanced with grooming context
)
# Validate
# Phase 1: Validate the response
validator = RoutineSuggestionValidator()
# Build initial suggestion without metadata
suggestion = RoutineSuggestion(
steps=steps,
reasoning=parsed.get("reasoning", ""),
summary=summary,
)
validation_result = validator.validate(suggestion, validation_context)
if not validation_result.is_valid:
@ -805,10 +847,15 @@ def suggest_routine(
detail=f"Generated routine failed safety validation: {'; '.join(validation_result.errors)}",
)
# Add warnings to response if any
# Phase 3: Add warnings, auto-fixes, and metadata to response
if validation_result.warnings:
logger.warning(f"Routine suggestion warnings: {validation_result.warnings}")
# Note: We'll add warnings field to RoutineSuggestion model in a moment
suggestion.validation_warnings = validation_result.warnings
if validation_result.auto_fixes:
suggestion.auto_fixes_applied = validation_result.auto_fixes
suggestion.metadata = _build_response_metadata(session, log_id)
return suggestion
@ -878,7 +925,7 @@ def suggest_batch(
"\nZwróć JSON zgodny ze schematem."
)
response = call_gemini(
response, log_id = call_gemini(
endpoint="routines/suggest-batch",
contents=prompt,
config=get_creative_config(
@ -936,11 +983,6 @@ def suggest_batch(
)
)
batch_suggestion = BatchSuggestion(
days=days, overall_reasoning=parsed.get("overall_reasoning", "")
)
# Phase 1: Validate the batch response
# Get skin snapshot for barrier state
stmt = select(SkinConditionSnapshot).order_by(
col(SkinConditionSnapshot.snapshot_date).desc()
@ -964,8 +1006,14 @@ def suggest_batch(
last_used_dates=last_used_dates_by_uuid,
)
# Validate
# Phase 1: Validate the batch response
batch_validator = BatchValidator()
# Build initial batch suggestion without metadata
batch_suggestion = BatchSuggestion(
days=days, overall_reasoning=parsed.get("overall_reasoning", "")
)
validation_result = batch_validator.validate(batch_suggestion, batch_context)
if not validation_result.is_valid:
@ -977,9 +1025,15 @@ def suggest_batch(
detail=f"Generated batch plan failed safety validation: {'; '.join(validation_result.errors)}",
)
# Log warnings if any
# Phase 3: Add warnings, auto-fixes, and metadata to response
if validation_result.warnings:
logger.warning(f"Batch routine warnings: {validation_result.warnings}")
batch_suggestion.validation_warnings = validation_result.warnings
if validation_result.auto_fixes:
batch_suggestion.auto_fixes_applied = validation_result.auto_fixes
batch_suggestion.metadata = _build_response_metadata(session, log_id)
return batch_suggestion

View file

@ -179,7 +179,7 @@ async def analyze_skin_photos(
)
image_summary = f"{len(photos)} image(s): {', '.join((p.content_type or 'unknown') for p in photos)}"
response = call_gemini(
response, log_id = call_gemini(
endpoint="skincare/analyze-photos",
contents=parts,
config=get_extraction_config(

View file

@ -109,8 +109,12 @@ def call_gemini(
config: genai_types.GenerateContentConfig,
user_input: str | None = None,
tool_trace: dict[str, Any] | None = None,
):
"""Call Gemini, log full request + response to DB, return response unchanged."""
) -> tuple[Any, Any]:
"""Call Gemini, log full request + response to DB.
Returns:
Tuple of (response, log_id) where log_id is the AICallLog.id (UUID) or None if logging failed.
"""
from sqlmodel import Session
from db import engine # deferred to avoid circular import at module load
@ -127,7 +131,13 @@ def call_gemini(
user_input = str(contents)
start = time.monotonic()
success, error_detail, response, finish_reason = True, None, None, None
success, error_detail, response, finish_reason, log_id = (
True,
None,
None,
None,
None,
)
try:
response = client.models.generate_content(
model=model, contents=contents, config=config
@ -199,7 +209,9 @@ def call_gemini(
with Session(engine) as s:
s.add(log)
s.commit()
return response
s.refresh(log)
log_id = log.id
return response, log_id
def call_gemini_with_function_tools(
@ -210,17 +222,22 @@ def call_gemini_with_function_tools(
function_handlers: dict[str, Callable[[dict[str, Any]], dict[str, Any]]],
user_input: str | None = None,
max_tool_roundtrips: int = 2,
):
"""Call Gemini with function-calling loop until final response text is produced."""
) -> tuple[Any, Any]:
"""Call Gemini with function-calling loop until final response text is produced.
Returns:
Tuple of (response, log_id) where log_id is the AICallLog.id (UUID) of the final call.
"""
if max_tool_roundtrips < 0:
raise ValueError("max_tool_roundtrips must be >= 0")
history = list(contents) if isinstance(contents, list) else [contents]
rounds = 0
trace_events: list[dict[str, Any]] = []
log_id = None
while True:
response = call_gemini(
response, log_id = call_gemini(
endpoint=endpoint,
contents=history,
config=config,
@ -233,7 +250,7 @@ def call_gemini_with_function_tools(
)
function_calls = list(getattr(response, "function_calls", None) or [])
if not function_calls:
return response
return response, log_id
if rounds >= max_tool_roundtrips:
raise HTTPException(

View file

@ -0,0 +1,29 @@
"""Models for API response metadata (Phase 3: UI/UX Observability)."""
from pydantic import BaseModel
class TokenMetrics(BaseModel):
"""Token usage metrics from LLM call."""
prompt_tokens: int
completion_tokens: int
thoughts_tokens: int | None = None
total_tokens: int
class ResponseMetadata(BaseModel):
"""Metadata about the LLM response for observability."""
model_used: str
duration_ms: int
reasoning_chain: str | None = None
token_metrics: TokenMetrics | None = None
class EnrichedResponse(BaseModel):
"""Base class for API responses with validation and metadata."""
validation_warnings: list[str] | None = None
auto_fixes_applied: list[str] | None = None
metadata: ResponseMetadata | None = None