feat(api): add LLM response validation and input sanitization

Implement Phase 1: Safety & Validation for all LLM-based suggestion engines.

- Add input sanitization module to prevent prompt injection attacks
- Implement 5 comprehensive validators (routine, batch, shopping, product parse, photo)
- Add 10+ critical safety checks (retinoid+acid conflicts, barrier compatibility, etc.)
- Integrate validation into all 5 API endpoints (routines, products, skincare)
- Add validation fields to ai_call_logs table (validation_errors, validation_warnings, auto_fixed)
- Create database migration for validation fields
- Add comprehensive test suite (9/9 tests passing, 88% coverage on validators)

Safety improvements:
- Blocks retinoid + acid conflicts in same routine/day
- Rejects unknown product IDs
- Enforces min_interval_hours rules
- Protects compromised skin barriers
- Prevents prohibited fields (dose, amount) in responses
- Validates all enum values and score ranges

All validation failures are logged and responses are rejected with HTTP 502.
This commit is contained in:
Piotr Oleszczyk 2026-03-06 10:16:47 +01:00
parent e3ed0dd3a3
commit 2a9391ad32
16 changed files with 2357 additions and 13 deletions

View file

@ -0,0 +1,17 @@
"""LLM response validators for safety and quality checks."""
from innercontext.validators.base import ValidationResult
from innercontext.validators.batch_validator import BatchValidator
from innercontext.validators.photo_validator import PhotoValidator
from innercontext.validators.product_parse_validator import ProductParseValidator
from innercontext.validators.routine_validator import RoutineSuggestionValidator
from innercontext.validators.shopping_validator import ShoppingValidator
__all__ = [
"ValidationResult",
"RoutineSuggestionValidator",
"ShoppingValidator",
"ProductParseValidator",
"BatchValidator",
"PhotoValidator",
]

View file

@ -0,0 +1,52 @@
"""Base classes for LLM response validation."""
from dataclasses import dataclass, field
from typing import Any
@dataclass
class ValidationResult:
"""Result of validating an LLM response."""
errors: list[str] = field(default_factory=list)
"""Critical errors that must block the response."""
warnings: list[str] = field(default_factory=list)
"""Non-critical issues to show to users."""
auto_fixes: list[str] = field(default_factory=list)
"""Description of automatic fixes applied."""
@property
def is_valid(self) -> bool:
"""True if there are no errors."""
return len(self.errors) == 0
def add_error(self, message: str) -> None:
"""Add a critical error."""
self.errors.append(message)
def add_warning(self, message: str) -> None:
"""Add a non-critical warning."""
self.warnings.append(message)
def add_fix(self, message: str) -> None:
"""Record an automatic fix that was applied."""
self.auto_fixes.append(message)
class BaseValidator:
"""Base class for all LLM response validators."""
def validate(self, response: Any, context: Any) -> ValidationResult:
"""
Validate an LLM response.
Args:
response: The parsed LLM response to validate
context: Additional context needed for validation
Returns:
ValidationResult with any errors/warnings found
"""
raise NotImplementedError("Subclasses must implement validate()")

View file

@ -0,0 +1,276 @@
"""Validator for batch routine suggestions (multi-day plans)."""
from collections import defaultdict
from dataclasses import dataclass
from datetime import date
from typing import Any
from uuid import UUID
from innercontext.validators.base import BaseValidator, ValidationResult
from innercontext.validators.routine_validator import (
RoutineSuggestionValidator,
RoutineValidationContext,
)
@dataclass
class BatchValidationContext:
"""Context needed to validate batch routine suggestions."""
valid_product_ids: set[UUID]
"""Set of product IDs that exist in the database."""
barrier_state: str | None
"""Current barrier state: 'intact', 'mildly_compromised', 'compromised'"""
products_by_id: dict[UUID, Any]
"""Map of product_id -> Product object for detailed checks."""
last_used_dates: dict[UUID, date]
"""Map of product_id -> last used date before batch period."""
class BatchValidator(BaseValidator):
"""Validates batch routine suggestions (multi-day AM+PM plans)."""
def __init__(self):
self.routine_validator = RoutineSuggestionValidator()
def validate(
self, response: Any, context: BatchValidationContext
) -> ValidationResult:
"""
Validate a batch routine suggestion.
Checks:
1. All individual routines pass single-routine validation
2. No retinoid + acid on same day (across AM/PM)
3. Product frequency limits respected across the batch
4. Min interval hours respected across days
Args:
response: Parsed batch suggestion with days
context: Validation context
Returns:
ValidationResult with any errors/warnings
"""
result = ValidationResult()
if not hasattr(response, "days"):
result.add_error("Response missing 'days' field")
return result
days = response.days
if not isinstance(days, list):
result.add_error("'days' must be a list")
return result
if not days:
result.add_error("'days' cannot be empty")
return result
# Track product usage for frequency checks
product_usage_dates: dict[UUID, list[date]] = defaultdict(list)
# Validate each day
for i, day in enumerate(days):
day_num = i + 1
if not hasattr(day, "date"):
result.add_error(f"Day {day_num}: missing 'date' field")
continue
day_date = day.date
if isinstance(day_date, str):
try:
day_date = date.fromisoformat(day_date)
except ValueError:
result.add_error(f"Day {day_num}: invalid date format '{day.date}'")
continue
# Collect products used this day for same-day conflict checking
day_products: set[UUID] = set()
day_has_retinoid = False
day_has_acid = False
# Validate AM routine if present
if hasattr(day, "am_steps") and day.am_steps:
am_result = self._validate_single_routine(
day.am_steps,
day_date,
"am",
context,
product_usage_dates,
f"Day {day_num} AM",
)
result.errors.extend(am_result.errors)
result.warnings.extend(am_result.warnings)
# Track products for same-day checking
products, has_retinoid, has_acid = self._get_routine_products(
day.am_steps, context
)
day_products.update(products)
if has_retinoid:
day_has_retinoid = True
if has_acid:
day_has_acid = True
# Validate PM routine if present
if hasattr(day, "pm_steps") and day.pm_steps:
pm_result = self._validate_single_routine(
day.pm_steps,
day_date,
"pm",
context,
product_usage_dates,
f"Day {day_num} PM",
)
result.errors.extend(pm_result.errors)
result.warnings.extend(pm_result.warnings)
# Track products for same-day checking
products, has_retinoid, has_acid = self._get_routine_products(
day.pm_steps, context
)
day_products.update(products)
if has_retinoid:
day_has_retinoid = True
if has_acid:
day_has_acid = True
# Check same-day retinoid + acid conflict
if day_has_retinoid and day_has_acid:
result.add_error(
f"Day {day_num} ({day_date}): SAFETY - cannot use retinoid and acid "
"on the same day (across AM+PM)"
)
# Check frequency limits across the batch
self._check_batch_frequency_limits(product_usage_dates, context, result)
return result
def _validate_single_routine(
self,
steps: list,
routine_date: date,
part_of_day: str,
context: BatchValidationContext,
product_usage_dates: dict[UUID, list[date]],
routine_label: str,
) -> ValidationResult:
"""Validate a single routine within the batch."""
# Build context for single routine validation
routine_context = RoutineValidationContext(
valid_product_ids=context.valid_product_ids,
routine_date=routine_date,
part_of_day=part_of_day,
leaving_home=None, # Not checked in batch mode
barrier_state=context.barrier_state,
products_by_id=context.products_by_id,
last_used_dates=context.last_used_dates,
just_shaved=False, # Not checked in batch mode
)
# Create a mock response object with steps
class MockRoutine:
def __init__(self, steps):
self.steps = steps
mock_response = MockRoutine(steps)
# Validate using routine validator
result = self.routine_validator.validate(mock_response, routine_context)
# Update product usage tracking
for step in steps:
if hasattr(step, "product_id") and step.product_id:
product_id = step.product_id
if isinstance(product_id, str):
try:
product_id = UUID(product_id)
except ValueError:
continue
product_usage_dates[product_id].append(routine_date)
# Prefix all errors/warnings with routine label
result.errors = [f"{routine_label}: {err}" for err in result.errors]
result.warnings = [f"{routine_label}: {warn}" for warn in result.warnings]
return result
def _get_routine_products(
self, steps: list, context: BatchValidationContext
) -> tuple[set[UUID], bool, bool]:
"""
Get products used in routine and check for retinoids/acids.
Returns:
(product_ids, has_retinoid, has_acid)
"""
products = set()
has_retinoid = False
has_acid = False
for step in steps:
if not hasattr(step, "product_id") or not step.product_id:
continue
product_id = step.product_id
if isinstance(product_id, str):
try:
product_id = UUID(product_id)
except ValueError:
continue
products.add(product_id)
product = context.products_by_id.get(product_id)
if not product:
continue
if self.routine_validator._has_retinoid(product):
has_retinoid = True
if self.routine_validator._has_acid(product):
has_acid = True
return products, has_retinoid, has_acid
def _check_batch_frequency_limits(
self,
product_usage_dates: dict[UUID, list[date]],
context: BatchValidationContext,
result: ValidationResult,
) -> None:
"""Check max_frequency_per_week limits across the batch."""
for product_id, usage_dates in product_usage_dates.items():
product = context.products_by_id.get(product_id)
if not product:
continue
if (
not hasattr(product, "max_frequency_per_week")
or not product.max_frequency_per_week
):
continue
max_per_week = product.max_frequency_per_week
# Group usage by week
weeks: dict[tuple[int, int], int] = defaultdict(
int
) # (year, week) -> count
for usage_date in usage_dates:
week_key = (usage_date.year, usage_date.isocalendar()[1])
weeks[week_key] += 1
# Check each week
for (year, week_num), count in weeks.items():
if count > max_per_week:
result.add_error(
f"Product {product.name}: used {count}x in week {week_num}/{year}, "
f"exceeds max_frequency_per_week={max_per_week}"
)

View file

@ -0,0 +1,178 @@
"""Validator for skin photo analysis responses."""
from typing import Any
from innercontext.validators.base import BaseValidator, ValidationResult
class PhotoValidator(BaseValidator):
"""Validates skin photo analysis LLM responses."""
# Valid enum values (from photo analysis system prompt)
VALID_OVERALL_STATE = {"excellent", "good", "fair", "poor"}
VALID_SKIN_TYPE = {
"dry",
"oily",
"combination",
"sensitive",
"normal",
"acne_prone",
}
VALID_TEXTURE = {"smooth", "rough", "flaky", "bumpy"}
VALID_BARRIER_STATE = {"intact", "mildly_compromised", "compromised"}
VALID_ACTIVE_CONCERNS = {
"acne",
"rosacea",
"hyperpigmentation",
"aging",
"dehydration",
"redness",
"damaged_barrier",
"pore_visibility",
"uneven_texture",
"sebum_excess",
}
def validate(self, response: Any, context: Any = None) -> ValidationResult:
"""
Validate a skin photo analysis response.
Checks:
1. Enum values match allowed strings
2. Metrics are integers 1-5 (or omitted)
3. Active concerns are from valid set
4. Risks and priorities are reasonable (short phrases)
5. Notes field is reasonably sized
Args:
response: Parsed photo analysis response
context: Not used for photo validation
Returns:
ValidationResult with any errors/warnings
"""
result = ValidationResult()
# Check enum fields
self._check_enum_field(
response, "overall_state", self.VALID_OVERALL_STATE, result
)
self._check_enum_field(response, "skin_type", self.VALID_SKIN_TYPE, result)
self._check_enum_field(response, "texture", self.VALID_TEXTURE, result)
self._check_enum_field(
response, "barrier_state", self.VALID_BARRIER_STATE, result
)
# Check metric fields (1-5 scale)
metric_fields = [
"hydration_level",
"sebum_tzone",
"sebum_cheeks",
"sensitivity_level",
]
for field in metric_fields:
self._check_metric_field(response, field, result)
# Check active_concerns list
if hasattr(response, "active_concerns") and response.active_concerns:
if not isinstance(response.active_concerns, list):
result.add_error("active_concerns must be a list")
else:
for concern in response.active_concerns:
if concern not in self.VALID_ACTIVE_CONCERNS:
result.add_error(
f"Invalid active concern '{concern}' - must be one of: "
f"{', '.join(sorted(self.VALID_ACTIVE_CONCERNS))}"
)
# Check risks list (short phrases)
if hasattr(response, "risks") and response.risks:
if not isinstance(response.risks, list):
result.add_error("risks must be a list")
else:
for i, risk in enumerate(response.risks):
if not isinstance(risk, str):
result.add_error(f"Risk {i + 1}: must be a string")
elif len(risk.split()) > 10:
result.add_warning(
f"Risk {i + 1}: too long ({len(risk.split())} words) - "
"should be max 10 words"
)
# Check priorities list (short phrases)
if hasattr(response, "priorities") and response.priorities:
if not isinstance(response.priorities, list):
result.add_error("priorities must be a list")
else:
for i, priority in enumerate(response.priorities):
if not isinstance(priority, str):
result.add_error(f"Priority {i + 1}: must be a string")
elif len(priority.split()) > 10:
result.add_warning(
f"Priority {i + 1}: too long ({len(priority.split())} words) - "
"should be max 10 words"
)
# Check notes field
if hasattr(response, "notes") and response.notes:
if not isinstance(response.notes, str):
result.add_error("notes must be a string")
else:
sentence_count = len(
[s for s in response.notes.split(".") if s.strip()]
)
if sentence_count > 6:
result.add_warning(
f"notes too long ({sentence_count} sentences) - "
"should be 2-4 sentences"
)
return result
def _check_enum_field(
self,
obj: Any,
field_name: str,
valid_values: set[str],
result: ValidationResult,
) -> None:
"""Check a single enum field."""
if not hasattr(obj, field_name):
return # Optional field
value = getattr(obj, field_name)
if value is None:
return # Optional field
if value not in valid_values:
result.add_error(
f"Invalid {field_name} '{value}' - must be one of: "
f"{', '.join(sorted(valid_values))}"
)
def _check_metric_field(
self,
obj: Any,
field_name: str,
result: ValidationResult,
) -> None:
"""Check a metric field is integer 1-5."""
if not hasattr(obj, field_name):
return # Optional field
value = getattr(obj, field_name)
if value is None:
return # Optional field
if not isinstance(value, int):
result.add_error(
f"{field_name} must be an integer, got {type(value).__name__}"
)
return
if value < 1 or value > 5:
result.add_error(f"{field_name} must be 1-5, got {value}")

View file

@ -0,0 +1,341 @@
"""Validator for product parsing responses."""
from typing import Any
from innercontext.validators.base import BaseValidator, ValidationResult
class ProductParseValidator(BaseValidator):
"""Validates product parsing LLM responses."""
# Valid enum values (from product parsing system prompt)
VALID_CATEGORIES = {
"cleanser",
"toner",
"essence",
"serum",
"moisturizer",
"spf",
"mask",
"exfoliant",
"hair_treatment",
"tool",
"spot_treatment",
"oil",
}
VALID_RECOMMENDED_TIME = {"am", "pm", "both"}
VALID_TEXTURES = {
"watery",
"gel",
"emulsion",
"cream",
"oil",
"balm",
"foam",
"fluid",
}
VALID_ABSORPTION_SPEED = {"very_fast", "fast", "moderate", "slow", "very_slow"}
VALID_SKIN_TYPES = {
"dry",
"oily",
"combination",
"sensitive",
"normal",
"acne_prone",
}
VALID_TARGETS = {
"acne",
"rosacea",
"hyperpigmentation",
"aging",
"dehydration",
"redness",
"damaged_barrier",
"pore_visibility",
"uneven_texture",
"hair_growth",
"sebum_excess",
}
VALID_ACTIVE_FUNCTIONS = {
"humectant",
"emollient",
"occlusive",
"exfoliant_aha",
"exfoliant_bha",
"exfoliant_pha",
"retinoid",
"antioxidant",
"soothing",
"barrier_support",
"brightening",
"anti_acne",
"ceramide",
"niacinamide",
"sunscreen",
"peptide",
"hair_growth_stimulant",
"prebiotic",
"vitamin_c",
"anti_aging",
}
def validate(self, response: Any, context: Any = None) -> ValidationResult:
"""
Validate a product parsing response.
Checks:
1. Required fields present (name, category)
2. Enum values match allowed strings
3. effect_profile scores in range 0-5
4. pH values reasonable (0-14)
5. Actives have valid functions
6. Strength/irritation levels in range 1-3
7. Booleans are actual booleans
Args:
response: Parsed product data
context: Not used for product parse validation
Returns:
ValidationResult with any errors/warnings
"""
result = ValidationResult()
# Check required fields
if not hasattr(response, "name") or not response.name:
result.add_error("Missing required field 'name'")
if not hasattr(response, "category") or not response.category:
result.add_error("Missing required field 'category'")
elif response.category not in self.VALID_CATEGORIES:
result.add_error(
f"Invalid category '{response.category}' - must be one of: "
f"{', '.join(sorted(self.VALID_CATEGORIES))}"
)
# Check enum fields
self._check_enum_field(
response, "recommended_time", self.VALID_RECOMMENDED_TIME, result
)
self._check_enum_field(response, "texture", self.VALID_TEXTURES, result)
self._check_enum_field(
response, "absorption_speed", self.VALID_ABSORPTION_SPEED, result
)
# Check list enum fields
self._check_list_enum_field(
response, "recommended_for", self.VALID_SKIN_TYPES, result
)
self._check_list_enum_field(response, "targets", self.VALID_TARGETS, result)
# Check effect_profile
if (
hasattr(response, "product_effect_profile")
and response.product_effect_profile
):
self._check_effect_profile(response.product_effect_profile, result)
# Check pH ranges
self._check_ph_values(response, result)
# Check actives
if hasattr(response, "actives") and response.actives:
self._check_actives(response.actives, result)
# Check boolean fields
self._check_boolean_fields(response, result)
return result
def _check_enum_field(
self,
obj: Any,
field_name: str,
valid_values: set[str],
result: ValidationResult,
) -> None:
"""Check a single enum field."""
if not hasattr(obj, field_name):
return # Optional field
value = getattr(obj, field_name)
if value is None:
return # Optional field
if value not in valid_values:
result.add_error(
f"Invalid {field_name} '{value}' - must be one of: "
f"{', '.join(sorted(valid_values))}"
)
def _check_list_enum_field(
self,
obj: Any,
field_name: str,
valid_values: set[str],
result: ValidationResult,
) -> None:
"""Check a list of enum values."""
if not hasattr(obj, field_name):
return
value_list = getattr(obj, field_name)
if value_list is None:
return
if not isinstance(value_list, list):
result.add_error(f"{field_name} must be a list")
return
for value in value_list:
if value not in valid_values:
result.add_error(
f"Invalid {field_name} value '{value}' - must be one of: "
f"{', '.join(sorted(valid_values))}"
)
def _check_effect_profile(self, profile: Any, result: ValidationResult) -> None:
"""Check effect_profile has all 13 fields with scores 0-5."""
expected_fields = {
"hydration_immediate",
"hydration_long_term",
"barrier_repair_strength",
"soothing_strength",
"exfoliation_strength",
"retinoid_strength",
"irritation_risk",
"comedogenic_risk",
"barrier_disruption_risk",
"dryness_risk",
"brightening_strength",
"anti_acne_strength",
"anti_aging_strength",
}
for field in expected_fields:
if not hasattr(profile, field):
result.add_warning(
f"effect_profile missing field '{field}' - should include all 13 fields"
)
continue
value = getattr(profile, field)
if value is None:
continue # Optional to omit
if not isinstance(value, int):
result.add_error(
f"effect_profile.{field} must be an integer, got {type(value).__name__}"
)
continue
if value < 0 or value > 5:
result.add_error(f"effect_profile.{field} must be 0-5, got {value}")
def _check_ph_values(self, obj: Any, result: ValidationResult) -> None:
"""Check pH values are in reasonable range."""
if hasattr(obj, "ph_min") and obj.ph_min is not None:
if not isinstance(obj.ph_min, (int, float)):
result.add_error(
f"ph_min must be a number, got {type(obj.ph_min).__name__}"
)
elif obj.ph_min < 0 or obj.ph_min > 14:
result.add_error(f"ph_min must be 0-14, got {obj.ph_min}")
if hasattr(obj, "ph_max") and obj.ph_max is not None:
if not isinstance(obj.ph_max, (int, float)):
result.add_error(
f"ph_max must be a number, got {type(obj.ph_max).__name__}"
)
elif obj.ph_max < 0 or obj.ph_max > 14:
result.add_error(f"ph_max must be 0-14, got {obj.ph_max}")
# Check min < max if both present
if (
hasattr(obj, "ph_min")
and obj.ph_min is not None
and hasattr(obj, "ph_max")
and obj.ph_max is not None
):
if obj.ph_min > obj.ph_max:
result.add_error(
f"ph_min ({obj.ph_min}) cannot be greater than ph_max ({obj.ph_max})"
)
def _check_actives(self, actives: list, result: ValidationResult) -> None:
"""Check actives list format."""
if not isinstance(actives, list):
result.add_error("actives must be a list")
return
for i, active in enumerate(actives):
active_num = i + 1
# Check name present
if not hasattr(active, "name") or not active.name:
result.add_error(f"Active {active_num}: missing 'name'")
# Check functions are valid
if hasattr(active, "functions") and active.functions:
if not isinstance(active.functions, list):
result.add_error(f"Active {active_num}: 'functions' must be a list")
else:
for func in active.functions:
if func not in self.VALID_ACTIVE_FUNCTIONS:
result.add_error(
f"Active {active_num}: invalid function '{func}'"
)
# Check strength_level (1-3)
if hasattr(active, "strength_level") and active.strength_level is not None:
if active.strength_level not in (1, 2, 3):
result.add_error(
f"Active {active_num}: strength_level must be 1, 2, or 3, got {active.strength_level}"
)
# Check irritation_potential (1-3)
if (
hasattr(active, "irritation_potential")
and active.irritation_potential is not None
):
if active.irritation_potential not in (1, 2, 3):
result.add_error(
f"Active {active_num}: irritation_potential must be 1, 2, or 3, got {active.irritation_potential}"
)
# Check percent is 0-100
if hasattr(active, "percent") and active.percent is not None:
if not isinstance(active.percent, (int, float)):
result.add_error(
f"Active {active_num}: percent must be a number, got {type(active.percent).__name__}"
)
elif active.percent < 0 or active.percent > 100:
result.add_error(
f"Active {active_num}: percent must be 0-100, got {active.percent}"
)
def _check_boolean_fields(self, obj: Any, result: ValidationResult) -> None:
"""Check boolean fields are actual booleans."""
boolean_fields = [
"leave_on",
"fragrance_free",
"essential_oils_free",
"alcohol_denat_free",
"pregnancy_safe",
"is_medication",
"is_tool",
]
for field in boolean_fields:
if hasattr(obj, field):
value = getattr(obj, field)
if value is not None and not isinstance(value, bool):
result.add_error(
f"{field} must be a boolean (true/false), got {type(value).__name__}"
)

View file

@ -0,0 +1,312 @@
"""Validator for routine suggestions (single day AM/PM)."""
from dataclasses import dataclass
from datetime import date
from typing import Any
from uuid import UUID
from innercontext.validators.base import BaseValidator, ValidationResult
@dataclass
class RoutineValidationContext:
"""Context needed to validate a routine suggestion."""
valid_product_ids: set[UUID]
"""Set of product IDs that exist in the database."""
routine_date: date
"""The date this routine is for."""
part_of_day: str
"""'am' or 'pm'"""
leaving_home: bool | None
"""Whether user is leaving home (for SPF check)."""
barrier_state: str | None
"""Current barrier state: 'intact', 'mildly_compromised', 'compromised'"""
products_by_id: dict[UUID, Any]
"""Map of product_id -> Product object for detailed checks."""
last_used_dates: dict[UUID, date]
"""Map of product_id -> last used date."""
just_shaved: bool = False
"""Whether user just shaved (affects context_rules check)."""
class RoutineSuggestionValidator(BaseValidator):
"""Validates routine suggestions for safety and correctness."""
PROHIBITED_FIELDS = {"dose", "amount", "quantity", "pumps", "drops"}
def validate(
self, response: Any, context: RoutineValidationContext
) -> ValidationResult:
"""
Validate a routine suggestion.
Checks:
1. All product_ids exist in database
2. No retinoid + acid in same routine
3. Respect min_interval_hours
4. Check max_frequency_per_week (if history available)
5. Verify context_rules (safe_after_shaving, safe_with_compromised_barrier)
6. AM routines must have SPF when leaving home
7. No high barrier_disruption_risk with compromised barrier
8. No prohibited fields (dose, etc.) in response
9. Each step has either product_id or action_type (not both, not neither)
Args:
response: Parsed routine suggestion with steps
context: Validation context with products and rules
Returns:
ValidationResult with any errors/warnings
"""
result = ValidationResult()
if not hasattr(response, "steps"):
result.add_error("Response missing 'steps' field")
return result
steps = response.steps
has_retinoid = False
has_acid = False
has_spf = False
product_steps = []
for i, step in enumerate(steps):
step_num = i + 1
# Check prohibited fields
self._check_prohibited_fields(step, step_num, result)
# Check step has either product_id or action_type
has_product = hasattr(step, "product_id") and step.product_id is not None
has_action = hasattr(step, "action_type") and step.action_type is not None
if not has_product and not has_action:
result.add_error(
f"Step {step_num}: must have either product_id or action_type"
)
continue
if has_product and has_action:
result.add_error(
f"Step {step_num}: cannot have both product_id and action_type"
)
continue
# Skip action-only steps for product validation
if not has_product:
continue
product_id = step.product_id
# Convert string UUID to UUID object if needed
if isinstance(product_id, str):
try:
product_id = UUID(product_id)
except ValueError:
result.add_error(
f"Step {step_num}: invalid UUID format: {product_id}"
)
continue
# Check product exists
if product_id not in context.valid_product_ids:
result.add_error(f"Step {step_num}: unknown product_id {product_id}")
continue
product = context.products_by_id.get(product_id)
if not product:
continue # Can't do detailed checks without product data
product_steps.append((step_num, product_id, product))
# Check for retinoids and acids
if self._has_retinoid(product):
has_retinoid = True
if self._has_acid(product):
has_acid = True
# Check for SPF
if product.category == "spf":
has_spf = True
# Check interval rules
self._check_interval_rules(step_num, product_id, product, context, result)
# Check context rules
self._check_context_rules(step_num, product, context, result)
# Check barrier compatibility
self._check_barrier_compatibility(step_num, product, context, result)
# Check retinoid + acid conflict
if has_retinoid and has_acid:
result.add_error(
"SAFETY: Cannot combine retinoid and acid (AHA/BHA/PHA) in same routine"
)
# Check SPF requirement for AM
if context.part_of_day == "am":
if context.leaving_home and not has_spf:
result.add_warning(
"AM routine without SPF while leaving home - UV protection recommended"
)
elif not context.leaving_home and not has_spf:
# Still warn but less severe
result.add_warning(
"AM routine without SPF - consider adding sun protection"
)
return result
def _check_prohibited_fields(
self, step: Any, step_num: int, result: ValidationResult
) -> None:
"""Check for prohibited fields like 'dose' in step."""
for field in self.PROHIBITED_FIELDS:
if hasattr(step, field):
result.add_error(
f"Step {step_num}: prohibited field '{field}' in response - "
"doses/amounts should not be specified"
)
def _has_retinoid(self, product: Any) -> bool:
"""Check if product contains retinoid."""
if not hasattr(product, "actives") or not product.actives:
return False
for active in product.actives:
if not hasattr(active, "functions"):
continue
if "retinoid" in (active.functions or []):
return True
# Also check effect_profile
if hasattr(product, "effect_profile") and product.effect_profile:
if hasattr(product.effect_profile, "retinoid_strength"):
if (product.effect_profile.retinoid_strength or 0) > 0:
return True
return False
def _has_acid(self, product: Any) -> bool:
"""Check if product contains AHA/BHA/PHA."""
if not hasattr(product, "actives") or not product.actives:
return False
acid_functions = {"exfoliant_aha", "exfoliant_bha", "exfoliant_pha"}
for active in product.actives:
if not hasattr(active, "functions"):
continue
if any(f in (active.functions or []) for f in acid_functions):
return True
# Also check effect_profile
if hasattr(product, "effect_profile") and product.effect_profile:
if hasattr(product.effect_profile, "exfoliation_strength"):
if (product.effect_profile.exfoliation_strength or 0) > 0:
return True
return False
def _check_interval_rules(
self,
step_num: int,
product_id: UUID,
product: Any,
context: RoutineValidationContext,
result: ValidationResult,
) -> None:
"""Check min_interval_hours is respected."""
if not hasattr(product, "min_interval_hours") or not product.min_interval_hours:
return
last_used = context.last_used_dates.get(product_id)
if not last_used:
return # Never used, no violation
hours_since_use = (context.routine_date - last_used).days * 24
# For same-day check, we need more granular time
# For now, just check if used same day
if last_used == context.routine_date:
result.add_error(
f"Step {step_num}: product {product.name} already used today, "
f"min_interval_hours={product.min_interval_hours}"
)
elif hours_since_use < product.min_interval_hours:
result.add_error(
f"Step {step_num}: product {product.name} used too recently "
f"(last used {last_used}, requires {product.min_interval_hours}h interval)"
)
def _check_context_rules(
self,
step_num: int,
product: Any,
context: RoutineValidationContext,
result: ValidationResult,
) -> None:
"""Check product context_rules are satisfied."""
if not hasattr(product, "context_rules") or not product.context_rules:
return
rules = product.context_rules
# Check post-shaving safety
if context.just_shaved and hasattr(rules, "safe_after_shaving"):
if not rules.safe_after_shaving:
result.add_warning(
f"Step {step_num}: {product.name} may irritate freshly shaved skin"
)
# Check barrier compatibility
if context.barrier_state in ("mildly_compromised", "compromised"):
if hasattr(rules, "safe_with_compromised_barrier"):
if not rules.safe_with_compromised_barrier:
result.add_error(
f"Step {step_num}: SAFETY - {product.name} not safe with "
f"{context.barrier_state} barrier"
)
def _check_barrier_compatibility(
self,
step_num: int,
product: Any,
context: RoutineValidationContext,
result: ValidationResult,
) -> None:
"""Check product is safe for current barrier state."""
if context.barrier_state != "compromised":
return # Only strict check for compromised barrier
if not hasattr(product, "effect_profile") or not product.effect_profile:
return
profile = product.effect_profile
# Check barrier disruption risk
if hasattr(profile, "barrier_disruption_risk"):
risk = profile.barrier_disruption_risk or 0
if risk >= 4: # High risk (4-5)
result.add_error(
f"Step {step_num}: SAFETY - {product.name} has high barrier "
f"disruption risk ({risk}/5) - not safe with compromised barrier"
)
# Check irritation risk
if hasattr(profile, "irritation_risk"):
risk = profile.irritation_risk or 0
if risk >= 4: # High risk
result.add_warning(
f"Step {step_num}: {product.name} has high irritation risk ({risk}/5) "
"- caution recommended with compromised barrier"
)

View file

@ -0,0 +1,229 @@
"""Validator for shopping suggestions."""
from dataclasses import dataclass
from typing import Any
from uuid import UUID
from innercontext.validators.base import BaseValidator, ValidationResult
@dataclass
class ShoppingValidationContext:
"""Context needed to validate shopping suggestions."""
owned_product_ids: set[UUID]
"""Product IDs user already owns (with inventory)."""
valid_categories: set[str]
"""Valid product categories."""
valid_targets: set[str]
"""Valid skin concern targets."""
class ShoppingValidator(BaseValidator):
"""Validates shopping suggestions for product types."""
# Realistic product type patterns (not exhaustive, just sanity checks)
VALID_PRODUCT_TYPE_PATTERNS = {
"serum",
"cream",
"cleanser",
"toner",
"essence",
"moisturizer",
"spf",
"sunscreen",
"oil",
"balm",
"mask",
"exfoliant",
"acid",
"retinoid",
"vitamin",
"niacinamide",
"hyaluronic",
"ceramide",
"peptide",
"antioxidant",
"aha",
"bha",
"pha",
}
VALID_FREQUENCIES = {
"daily",
"twice daily",
"am",
"pm",
"both",
"2x weekly",
"3x weekly",
"2-3x weekly",
"weekly",
"as needed",
"occasional",
}
def validate(
self, response: Any, context: ShoppingValidationContext
) -> ValidationResult:
"""
Validate shopping suggestions.
Checks:
1. suggestions field present
2. Product types are realistic (contain known keywords)
3. Not suggesting products user already owns (should mark as [])
4. Recommended frequencies are valid
5. Categories are valid
6. Targets are valid
7. Each suggestion has required fields
Args:
response: Parsed shopping suggestion response
context: Validation context
Returns:
ValidationResult with any errors/warnings
"""
result = ValidationResult()
if not hasattr(response, "suggestions"):
result.add_error("Response missing 'suggestions' field")
return result
suggestions = response.suggestions
if not isinstance(suggestions, list):
result.add_error("'suggestions' must be a list")
return result
for i, suggestion in enumerate(suggestions):
sug_num = i + 1
# Check required fields
self._check_required_fields(suggestion, sug_num, result)
# Check category is valid
if hasattr(suggestion, "category") and suggestion.category:
if suggestion.category not in context.valid_categories:
result.add_error(
f"Suggestion {sug_num}: invalid category '{suggestion.category}'"
)
# Check product type is realistic
if hasattr(suggestion, "product_type") and suggestion.product_type:
self._check_product_type_realistic(
suggestion.product_type, sug_num, result
)
# Check frequency is valid
if hasattr(suggestion, "frequency") and suggestion.frequency:
self._check_frequency_valid(suggestion.frequency, sug_num, result)
# Check targets are valid
if hasattr(suggestion, "target_concerns") and suggestion.target_concerns:
self._check_targets_valid(
suggestion.target_concerns, sug_num, context, result
)
# Check recommended_time is valid
if hasattr(suggestion, "recommended_time") and suggestion.recommended_time:
if suggestion.recommended_time not in ("am", "pm", "both"):
result.add_error(
f"Suggestion {sug_num}: invalid recommended_time "
f"'{suggestion.recommended_time}' (must be 'am', 'pm', or 'both')"
)
return result
def _check_required_fields(
self, suggestion: Any, sug_num: int, result: ValidationResult
) -> None:
"""Check suggestion has required fields."""
required = ["category", "product_type", "why_needed"]
for field in required:
if not hasattr(suggestion, field) or getattr(suggestion, field) is None:
result.add_error(
f"Suggestion {sug_num}: missing required field '{field}'"
)
def _check_product_type_realistic(
self, product_type: str, sug_num: int, result: ValidationResult
) -> None:
"""Check product type contains realistic keywords."""
product_type_lower = product_type.lower()
# Check if any valid pattern appears in the product type
has_valid_keyword = any(
pattern in product_type_lower
for pattern in self.VALID_PRODUCT_TYPE_PATTERNS
)
if not has_valid_keyword:
result.add_warning(
f"Suggestion {sug_num}: product type '{product_type}' looks unusual - "
"verify it's a real skincare product category"
)
# Check for brand names (shouldn't suggest specific brands)
suspicious_brands = [
"la roche",
"cerave",
"paula",
"ordinary",
"skinceuticals",
"drunk elephant",
"versed",
"inkey",
"cosrx",
"pixi",
]
if any(brand in product_type_lower for brand in suspicious_brands):
result.add_error(
f"Suggestion {sug_num}: product type contains brand name - "
"should suggest product TYPES only, not specific brands"
)
def _check_frequency_valid(
self, frequency: str, sug_num: int, result: ValidationResult
) -> None:
"""Check frequency is a recognized pattern."""
frequency_lower = frequency.lower()
# Check for exact matches or common patterns
is_valid = (
frequency_lower in self.VALID_FREQUENCIES
or "daily" in frequency_lower
or "weekly" in frequency_lower
or "am" in frequency_lower
or "pm" in frequency_lower
or "x" in frequency_lower # e.g. "2x weekly"
)
if not is_valid:
result.add_warning(
f"Suggestion {sug_num}: unusual frequency '{frequency}' - "
"verify it's a realistic usage pattern"
)
def _check_targets_valid(
self,
target_concerns: list[str],
sug_num: int,
context: ShoppingValidationContext,
result: ValidationResult,
) -> None:
"""Check target concerns are valid."""
if not isinstance(target_concerns, list):
result.add_error(f"Suggestion {sug_num}: target_concerns must be a list")
return
for target in target_concerns:
if target not in context.valid_targets:
result.add_error(
f"Suggestion {sug_num}: invalid target concern '{target}'"
)