feat(api): add LLM response validation and input sanitization

Implement Phase 1: Safety & Validation for all LLM-based suggestion engines. - Add input sanitization module to prevent prompt injection attacks - Implement 5 comprehensive validators (routine, batch, shopping, product parse, photo) - Add 10+ critical safety checks (retinoid+acid conflicts, barrier compatibility, etc.) - Integrate validation into all 5 API endpoints (routines, products, skincare) - Add validation fields to ai_call_logs table (validation_errors, validation_warnings, auto_fixed) - Create database migration for validation fields - Add comprehensive test suite (9/9 tests passing, 88% coverage on validators) Safety improvements: - Blocks retinoid + acid conflicts in same routine/day - Rejects unknown product IDs - Enforces min_interval_hours rules - Protects compromised skin barriers - Prevents prohibited fields (dose, amount) in responses - Validates all enum values and score ranges All validation failures are logged and responses are rejected with HTTP 502.
2026-03-06 10:16:47 +01:00 · 2026-03-06 10:16:47 +01:00 · 2a9391ad32
commit 2a9391ad32
parent e3ed0dd3a3
16 changed files with 2357 additions and 13 deletions
--- a/backend/innercontext/validators/init.py
+++ b/backend/innercontext/validators/init.py
@ -0,0 +1,17 @@
+"""LLM response validators for safety and quality checks."""
+
+from innercontext.validators.base import ValidationResult
+from innercontext.validators.batch_validator import BatchValidator
+from innercontext.validators.photo_validator import PhotoValidator
+from innercontext.validators.product_parse_validator import ProductParseValidator
+from innercontext.validators.routine_validator import RoutineSuggestionValidator
+from innercontext.validators.shopping_validator import ShoppingValidator
+
+__all__ = [
+    "ValidationResult",
+    "RoutineSuggestionValidator",
+    "ShoppingValidator",
+    "ProductParseValidator",
+    "BatchValidator",
+    "PhotoValidator",
+]
--- a/backend/innercontext/validators/base.py
+++ b/backend/innercontext/validators/base.py
@ -0,0 +1,52 @@
+"""Base classes for LLM response validation."""
+
+from dataclasses import dataclass, field
+from typing import Any
+
+
+@dataclass
+class ValidationResult:
+    """Result of validating an LLM response."""
+
+    errors: list[str] = field(default_factory=list)
+    """Critical errors that must block the response."""
+
+    warnings: list[str] = field(default_factory=list)
+    """Non-critical issues to show to users."""
+
+    auto_fixes: list[str] = field(default_factory=list)
+    """Description of automatic fixes applied."""
+
+    @property
+    def is_valid(self) -> bool:
+        """True if there are no errors."""
+        return len(self.errors) == 0
+
+    def add_error(self, message: str) -> None:
+        """Add a critical error."""
+        self.errors.append(message)
+
+    def add_warning(self, message: str) -> None:
+        """Add a non-critical warning."""
+        self.warnings.append(message)
+
+    def add_fix(self, message: str) -> None:
+        """Record an automatic fix that was applied."""
+        self.auto_fixes.append(message)
+
+
+class BaseValidator:
+    """Base class for all LLM response validators."""
+
+    def validate(self, response: Any, context: Any) -> ValidationResult:
+        """
+        Validate an LLM response.
+
+        Args:
+            response: The parsed LLM response to validate
+            context: Additional context needed for validation
+
+        Returns:
+            ValidationResult with any errors/warnings found
+        """
+        raise NotImplementedError("Subclasses must implement validate()")
--- a/backend/innercontext/validators/batch_validator.py
+++ b/backend/innercontext/validators/batch_validator.py
@ -0,0 +1,276 @@
+"""Validator for batch routine suggestions (multi-day plans)."""
+
+from collections import defaultdict
+from dataclasses import dataclass
+from datetime import date
+from typing import Any
+from uuid import UUID
+
+from innercontext.validators.base import BaseValidator, ValidationResult
+from innercontext.validators.routine_validator import (
+    RoutineSuggestionValidator,
+    RoutineValidationContext,
+)
+
+
+@dataclass
+class BatchValidationContext:
+    """Context needed to validate batch routine suggestions."""
+
+    valid_product_ids: set[UUID]
+    """Set of product IDs that exist in the database."""
+
+    barrier_state: str | None
+    """Current barrier state: 'intact', 'mildly_compromised', 'compromised'"""
+
+    products_by_id: dict[UUID, Any]
+    """Map of product_id -> Product object for detailed checks."""
+
+    last_used_dates: dict[UUID, date]
+    """Map of product_id -> last used date before batch period."""
+
+
+class BatchValidator(BaseValidator):
+    """Validates batch routine suggestions (multi-day AM+PM plans)."""
+
+    def __init__(self):
+        self.routine_validator = RoutineSuggestionValidator()
+
+    def validate(
+        self, response: Any, context: BatchValidationContext
+    ) -> ValidationResult:
+        """
+        Validate a batch routine suggestion.
+
+        Checks:
+        1. All individual routines pass single-routine validation
+        2. No retinoid + acid on same day (across AM/PM)
+        3. Product frequency limits respected across the batch
+        4. Min interval hours respected across days
+
+        Args:
+            response: Parsed batch suggestion with days
+            context: Validation context
+
+        Returns:
+            ValidationResult with any errors/warnings
+        """
+        result = ValidationResult()
+
+        if not hasattr(response, "days"):
+            result.add_error("Response missing 'days' field")
+            return result
+
+        days = response.days
+
+        if not isinstance(days, list):
+            result.add_error("'days' must be a list")
+            return result
+
+        if not days:
+            result.add_error("'days' cannot be empty")
+            return result
+
+        # Track product usage for frequency checks
+        product_usage_dates: dict[UUID, list[date]] = defaultdict(list)
+
+        # Validate each day
+        for i, day in enumerate(days):
+            day_num = i + 1
+
+            if not hasattr(day, "date"):
+                result.add_error(f"Day {day_num}: missing 'date' field")
+                continue
+
+            day_date = day.date
+            if isinstance(day_date, str):
+                try:
+                    day_date = date.fromisoformat(day_date)
+                except ValueError:
+                    result.add_error(f"Day {day_num}: invalid date format '{day.date}'")
+                    continue
+
+            # Collect products used this day for same-day conflict checking
+            day_products: set[UUID] = set()
+            day_has_retinoid = False
+            day_has_acid = False
+
+            # Validate AM routine if present
+            if hasattr(day, "am_steps") and day.am_steps:
+                am_result = self._validate_single_routine(
+                    day.am_steps,
+                    day_date,
+                    "am",
+                    context,
+                    product_usage_dates,
+                    f"Day {day_num} AM",
+                )
+                result.errors.extend(am_result.errors)
+                result.warnings.extend(am_result.warnings)
+
+                # Track products for same-day checking
+                products, has_retinoid, has_acid = self._get_routine_products(
+                    day.am_steps, context
+                )
+                day_products.update(products)
+                if has_retinoid:
+                    day_has_retinoid = True
+                if has_acid:
+                    day_has_acid = True
+
+            # Validate PM routine if present
+            if hasattr(day, "pm_steps") and day.pm_steps:
+                pm_result = self._validate_single_routine(
+                    day.pm_steps,
+                    day_date,
+                    "pm",
+                    context,
+                    product_usage_dates,
+                    f"Day {day_num} PM",
+                )
+                result.errors.extend(pm_result.errors)
+                result.warnings.extend(pm_result.warnings)
+
+                # Track products for same-day checking
+                products, has_retinoid, has_acid = self._get_routine_products(
+                    day.pm_steps, context
+                )
+                day_products.update(products)
+                if has_retinoid:
+                    day_has_retinoid = True
+                if has_acid:
+                    day_has_acid = True
+
+            # Check same-day retinoid + acid conflict
+            if day_has_retinoid and day_has_acid:
+                result.add_error(
+                    f"Day {day_num} ({day_date}): SAFETY - cannot use retinoid and acid "
+                    "on the same day (across AM+PM)"
+                )
+
+        # Check frequency limits across the batch
+        self._check_batch_frequency_limits(product_usage_dates, context, result)
+
+        return result
+
+    def _validate_single_routine(
+        self,
+        steps: list,
+        routine_date: date,
+        part_of_day: str,
+        context: BatchValidationContext,
+        product_usage_dates: dict[UUID, list[date]],
+        routine_label: str,
+    ) -> ValidationResult:
+        """Validate a single routine within the batch."""
+        # Build context for single routine validation
+        routine_context = RoutineValidationContext(
+            valid_product_ids=context.valid_product_ids,
+            routine_date=routine_date,
+            part_of_day=part_of_day,
+            leaving_home=None,  # Not checked in batch mode
+            barrier_state=context.barrier_state,
+            products_by_id=context.products_by_id,
+            last_used_dates=context.last_used_dates,
+            just_shaved=False,  # Not checked in batch mode
+        )
+
+        # Create a mock response object with steps
+        class MockRoutine:
+            def __init__(self, steps):
+                self.steps = steps
+
+        mock_response = MockRoutine(steps)
+
+        # Validate using routine validator
+        result = self.routine_validator.validate(mock_response, routine_context)
+
+        # Update product usage tracking
+        for step in steps:
+            if hasattr(step, "product_id") and step.product_id:
+                product_id = step.product_id
+                if isinstance(product_id, str):
+                    try:
+                        product_id = UUID(product_id)
+                    except ValueError:
+                        continue
+                product_usage_dates[product_id].append(routine_date)
+
+        # Prefix all errors/warnings with routine label
+        result.errors = [f"{routine_label}: {err}" for err in result.errors]
+        result.warnings = [f"{routine_label}: {warn}" for warn in result.warnings]
+
+        return result
+
+    def _get_routine_products(
+        self, steps: list, context: BatchValidationContext
+    ) -> tuple[set[UUID], bool, bool]:
+        """
+        Get products used in routine and check for retinoids/acids.
+
+        Returns:
+            (product_ids, has_retinoid, has_acid)
+        """
+        products = set()
+        has_retinoid = False
+        has_acid = False
+
+        for step in steps:
+            if not hasattr(step, "product_id") or not step.product_id:
+                continue
+
+            product_id = step.product_id
+            if isinstance(product_id, str):
+                try:
+                    product_id = UUID(product_id)
+                except ValueError:
+                    continue
+
+            products.add(product_id)
+
+            product = context.products_by_id.get(product_id)
+            if not product:
+                continue
+
+            if self.routine_validator._has_retinoid(product):
+                has_retinoid = True
+            if self.routine_validator._has_acid(product):
+                has_acid = True
+
+        return products, has_retinoid, has_acid
+
+    def _check_batch_frequency_limits(
+        self,
+        product_usage_dates: dict[UUID, list[date]],
+        context: BatchValidationContext,
+        result: ValidationResult,
+    ) -> None:
+        """Check max_frequency_per_week limits across the batch."""
+        for product_id, usage_dates in product_usage_dates.items():
+            product = context.products_by_id.get(product_id)
+            if not product:
+                continue
+
+            if (
+                not hasattr(product, "max_frequency_per_week")
+                or not product.max_frequency_per_week
+            ):
+                continue
+
+            max_per_week = product.max_frequency_per_week
+
+            # Group usage by week
+            weeks: dict[tuple[int, int], int] = defaultdict(
+                int
+            )  # (year, week) -> count
+            for usage_date in usage_dates:
+                week_key = (usage_date.year, usage_date.isocalendar()[1])
+                weeks[week_key] += 1
+
+            # Check each week
+            for (year, week_num), count in weeks.items():
+                if count > max_per_week:
+                    result.add_error(
+                        f"Product {product.name}: used {count}x in week {week_num}/{year}, "
+                        f"exceeds max_frequency_per_week={max_per_week}"
+                    )
--- a/backend/innercontext/validators/photo_validator.py
+++ b/backend/innercontext/validators/photo_validator.py
@ -0,0 +1,178 @@
+"""Validator for skin photo analysis responses."""
+
+from typing import Any
+
+from innercontext.validators.base import BaseValidator, ValidationResult
+
+
+class PhotoValidator(BaseValidator):
+    """Validates skin photo analysis LLM responses."""
+
+    # Valid enum values (from photo analysis system prompt)
+    VALID_OVERALL_STATE = {"excellent", "good", "fair", "poor"}
+
+    VALID_SKIN_TYPE = {
+        "dry",
+        "oily",
+        "combination",
+        "sensitive",
+        "normal",
+        "acne_prone",
+    }
+
+    VALID_TEXTURE = {"smooth", "rough", "flaky", "bumpy"}
+
+    VALID_BARRIER_STATE = {"intact", "mildly_compromised", "compromised"}
+
+    VALID_ACTIVE_CONCERNS = {
+        "acne",
+        "rosacea",
+        "hyperpigmentation",
+        "aging",
+        "dehydration",
+        "redness",
+        "damaged_barrier",
+        "pore_visibility",
+        "uneven_texture",
+        "sebum_excess",
+    }
+
+    def validate(self, response: Any, context: Any = None) -> ValidationResult:
+        """
+        Validate a skin photo analysis response.
+
+        Checks:
+        1. Enum values match allowed strings
+        2. Metrics are integers 1-5 (or omitted)
+        3. Active concerns are from valid set
+        4. Risks and priorities are reasonable (short phrases)
+        5. Notes field is reasonably sized
+
+        Args:
+            response: Parsed photo analysis response
+            context: Not used for photo validation
+
+        Returns:
+            ValidationResult with any errors/warnings
+        """
+        result = ValidationResult()
+
+        # Check enum fields
+        self._check_enum_field(
+            response, "overall_state", self.VALID_OVERALL_STATE, result
+        )
+        self._check_enum_field(response, "skin_type", self.VALID_SKIN_TYPE, result)
+        self._check_enum_field(response, "texture", self.VALID_TEXTURE, result)
+        self._check_enum_field(
+            response, "barrier_state", self.VALID_BARRIER_STATE, result
+        )
+
+        # Check metric fields (1-5 scale)
+        metric_fields = [
+            "hydration_level",
+            "sebum_tzone",
+            "sebum_cheeks",
+            "sensitivity_level",
+        ]
+        for field in metric_fields:
+            self._check_metric_field(response, field, result)
+
+        # Check active_concerns list
+        if hasattr(response, "active_concerns") and response.active_concerns:
+            if not isinstance(response.active_concerns, list):
+                result.add_error("active_concerns must be a list")
+            else:
+                for concern in response.active_concerns:
+                    if concern not in self.VALID_ACTIVE_CONCERNS:
+                        result.add_error(
+                            f"Invalid active concern '{concern}' - must be one of: "
+                            f"{', '.join(sorted(self.VALID_ACTIVE_CONCERNS))}"
+                        )
+
+        # Check risks list (short phrases)
+        if hasattr(response, "risks") and response.risks:
+            if not isinstance(response.risks, list):
+                result.add_error("risks must be a list")
+            else:
+                for i, risk in enumerate(response.risks):
+                    if not isinstance(risk, str):
+                        result.add_error(f"Risk {i + 1}: must be a string")
+                    elif len(risk.split()) > 10:
+                        result.add_warning(
+                            f"Risk {i + 1}: too long ({len(risk.split())} words) - "
+                            "should be max 10 words"
+                        )
+
+        # Check priorities list (short phrases)
+        if hasattr(response, "priorities") and response.priorities:
+            if not isinstance(response.priorities, list):
+                result.add_error("priorities must be a list")
+            else:
+                for i, priority in enumerate(response.priorities):
+                    if not isinstance(priority, str):
+                        result.add_error(f"Priority {i + 1}: must be a string")
+                    elif len(priority.split()) > 10:
+                        result.add_warning(
+                            f"Priority {i + 1}: too long ({len(priority.split())} words) - "
+                            "should be max 10 words"
+                        )
+
+        # Check notes field
+        if hasattr(response, "notes") and response.notes:
+            if not isinstance(response.notes, str):
+                result.add_error("notes must be a string")
+            else:
+                sentence_count = len(
+                    [s for s in response.notes.split(".") if s.strip()]
+                )
+                if sentence_count > 6:
+                    result.add_warning(
+                        f"notes too long ({sentence_count} sentences) - "
+                        "should be 2-4 sentences"
+                    )
+
+        return result
+
+    def _check_enum_field(
+        self,
+        obj: Any,
+        field_name: str,
+        valid_values: set[str],
+        result: ValidationResult,
+    ) -> None:
+        """Check a single enum field."""
+        if not hasattr(obj, field_name):
+            return  # Optional field
+
+        value = getattr(obj, field_name)
+        if value is None:
+            return  # Optional field
+
+        if value not in valid_values:
+            result.add_error(
+                f"Invalid {field_name} '{value}' - must be one of: "
+                f"{', '.join(sorted(valid_values))}"
+            )
+
+    def _check_metric_field(
+        self,
+        obj: Any,
+        field_name: str,
+        result: ValidationResult,
+    ) -> None:
+        """Check a metric field is integer 1-5."""
+        if not hasattr(obj, field_name):
+            return  # Optional field
+
+        value = getattr(obj, field_name)
+        if value is None:
+            return  # Optional field
+
+        if not isinstance(value, int):
+            result.add_error(
+                f"{field_name} must be an integer, got {type(value).__name__}"
+            )
+            return
+
+        if value < 1 or value > 5:
+            result.add_error(f"{field_name} must be 1-5, got {value}")
--- a/backend/innercontext/validators/product_parse_validator.py
+++ b/backend/innercontext/validators/product_parse_validator.py
@ -0,0 +1,341 @@
+"""Validator for product parsing responses."""
+
+from typing import Any
+
+from innercontext.validators.base import BaseValidator, ValidationResult
+
+
+class ProductParseValidator(BaseValidator):
+    """Validates product parsing LLM responses."""
+
+    # Valid enum values (from product parsing system prompt)
+    VALID_CATEGORIES = {
+        "cleanser",
+        "toner",
+        "essence",
+        "serum",
+        "moisturizer",
+        "spf",
+        "mask",
+        "exfoliant",
+        "hair_treatment",
+        "tool",
+        "spot_treatment",
+        "oil",
+    }
+
+    VALID_RECOMMENDED_TIME = {"am", "pm", "both"}
+
+    VALID_TEXTURES = {
+        "watery",
+        "gel",
+        "emulsion",
+        "cream",
+        "oil",
+        "balm",
+        "foam",
+        "fluid",
+    }
+
+    VALID_ABSORPTION_SPEED = {"very_fast", "fast", "moderate", "slow", "very_slow"}
+
+    VALID_SKIN_TYPES = {
+        "dry",
+        "oily",
+        "combination",
+        "sensitive",
+        "normal",
+        "acne_prone",
+    }
+
+    VALID_TARGETS = {
+        "acne",
+        "rosacea",
+        "hyperpigmentation",
+        "aging",
+        "dehydration",
+        "redness",
+        "damaged_barrier",
+        "pore_visibility",
+        "uneven_texture",
+        "hair_growth",
+        "sebum_excess",
+    }
+
+    VALID_ACTIVE_FUNCTIONS = {
+        "humectant",
+        "emollient",
+        "occlusive",
+        "exfoliant_aha",
+        "exfoliant_bha",
+        "exfoliant_pha",
+        "retinoid",
+        "antioxidant",
+        "soothing",
+        "barrier_support",
+        "brightening",
+        "anti_acne",
+        "ceramide",
+        "niacinamide",
+        "sunscreen",
+        "peptide",
+        "hair_growth_stimulant",
+        "prebiotic",
+        "vitamin_c",
+        "anti_aging",
+    }
+
+    def validate(self, response: Any, context: Any = None) -> ValidationResult:
+        """
+        Validate a product parsing response.
+
+        Checks:
+        1. Required fields present (name, category)
+        2. Enum values match allowed strings
+        3. effect_profile scores in range 0-5
+        4. pH values reasonable (0-14)
+        5. Actives have valid functions
+        6. Strength/irritation levels in range 1-3
+        7. Booleans are actual booleans
+
+        Args:
+            response: Parsed product data
+            context: Not used for product parse validation
+
+        Returns:
+            ValidationResult with any errors/warnings
+        """
+        result = ValidationResult()
+
+        # Check required fields
+        if not hasattr(response, "name") or not response.name:
+            result.add_error("Missing required field 'name'")
+
+        if not hasattr(response, "category") or not response.category:
+            result.add_error("Missing required field 'category'")
+        elif response.category not in self.VALID_CATEGORIES:
+            result.add_error(
+                f"Invalid category '{response.category}' - must be one of: "
+                f"{', '.join(sorted(self.VALID_CATEGORIES))}"
+            )
+
+        # Check enum fields
+        self._check_enum_field(
+            response, "recommended_time", self.VALID_RECOMMENDED_TIME, result
+        )
+        self._check_enum_field(response, "texture", self.VALID_TEXTURES, result)
+        self._check_enum_field(
+            response, "absorption_speed", self.VALID_ABSORPTION_SPEED, result
+        )
+
+        # Check list enum fields
+        self._check_list_enum_field(
+            response, "recommended_for", self.VALID_SKIN_TYPES, result
+        )
+        self._check_list_enum_field(response, "targets", self.VALID_TARGETS, result)
+
+        # Check effect_profile
+        if (
+            hasattr(response, "product_effect_profile")
+            and response.product_effect_profile
+        ):
+            self._check_effect_profile(response.product_effect_profile, result)
+
+        # Check pH ranges
+        self._check_ph_values(response, result)
+
+        # Check actives
+        if hasattr(response, "actives") and response.actives:
+            self._check_actives(response.actives, result)
+
+        # Check boolean fields
+        self._check_boolean_fields(response, result)
+
+        return result
+
+    def _check_enum_field(
+        self,
+        obj: Any,
+        field_name: str,
+        valid_values: set[str],
+        result: ValidationResult,
+    ) -> None:
+        """Check a single enum field."""
+        if not hasattr(obj, field_name):
+            return  # Optional field
+
+        value = getattr(obj, field_name)
+        if value is None:
+            return  # Optional field
+
+        if value not in valid_values:
+            result.add_error(
+                f"Invalid {field_name} '{value}' - must be one of: "
+                f"{', '.join(sorted(valid_values))}"
+            )
+
+    def _check_list_enum_field(
+        self,
+        obj: Any,
+        field_name: str,
+        valid_values: set[str],
+        result: ValidationResult,
+    ) -> None:
+        """Check a list of enum values."""
+        if not hasattr(obj, field_name):
+            return
+
+        value_list = getattr(obj, field_name)
+        if value_list is None:
+            return
+
+        if not isinstance(value_list, list):
+            result.add_error(f"{field_name} must be a list")
+            return
+
+        for value in value_list:
+            if value not in valid_values:
+                result.add_error(
+                    f"Invalid {field_name} value '{value}' - must be one of: "
+                    f"{', '.join(sorted(valid_values))}"
+                )
+
+    def _check_effect_profile(self, profile: Any, result: ValidationResult) -> None:
+        """Check effect_profile has all 13 fields with scores 0-5."""
+        expected_fields = {
+            "hydration_immediate",
+            "hydration_long_term",
+            "barrier_repair_strength",
+            "soothing_strength",
+            "exfoliation_strength",
+            "retinoid_strength",
+            "irritation_risk",
+            "comedogenic_risk",
+            "barrier_disruption_risk",
+            "dryness_risk",
+            "brightening_strength",
+            "anti_acne_strength",
+            "anti_aging_strength",
+        }
+
+        for field in expected_fields:
+            if not hasattr(profile, field):
+                result.add_warning(
+                    f"effect_profile missing field '{field}' - should include all 13 fields"
+                )
+                continue
+
+            value = getattr(profile, field)
+            if value is None:
+                continue  # Optional to omit
+
+            if not isinstance(value, int):
+                result.add_error(
+                    f"effect_profile.{field} must be an integer, got {type(value).__name__}"
+                )
+                continue
+
+            if value < 0 or value > 5:
+                result.add_error(f"effect_profile.{field} must be 0-5, got {value}")
+
+    def _check_ph_values(self, obj: Any, result: ValidationResult) -> None:
+        """Check pH values are in reasonable range."""
+        if hasattr(obj, "ph_min") and obj.ph_min is not None:
+            if not isinstance(obj.ph_min, (int, float)):
+                result.add_error(
+                    f"ph_min must be a number, got {type(obj.ph_min).__name__}"
+                )
+            elif obj.ph_min < 0 or obj.ph_min > 14:
+                result.add_error(f"ph_min must be 0-14, got {obj.ph_min}")
+
+        if hasattr(obj, "ph_max") and obj.ph_max is not None:
+            if not isinstance(obj.ph_max, (int, float)):
+                result.add_error(
+                    f"ph_max must be a number, got {type(obj.ph_max).__name__}"
+                )
+            elif obj.ph_max < 0 or obj.ph_max > 14:
+                result.add_error(f"ph_max must be 0-14, got {obj.ph_max}")
+
+        # Check min < max if both present
+        if (
+            hasattr(obj, "ph_min")
+            and obj.ph_min is not None
+            and hasattr(obj, "ph_max")
+            and obj.ph_max is not None
+        ):
+            if obj.ph_min > obj.ph_max:
+                result.add_error(
+                    f"ph_min ({obj.ph_min}) cannot be greater than ph_max ({obj.ph_max})"
+                )
+
+    def _check_actives(self, actives: list, result: ValidationResult) -> None:
+        """Check actives list format."""
+        if not isinstance(actives, list):
+            result.add_error("actives must be a list")
+            return
+
+        for i, active in enumerate(actives):
+            active_num = i + 1
+
+            # Check name present
+            if not hasattr(active, "name") or not active.name:
+                result.add_error(f"Active {active_num}: missing 'name'")
+
+            # Check functions are valid
+            if hasattr(active, "functions") and active.functions:
+                if not isinstance(active.functions, list):
+                    result.add_error(f"Active {active_num}: 'functions' must be a list")
+                else:
+                    for func in active.functions:
+                        if func not in self.VALID_ACTIVE_FUNCTIONS:
+                            result.add_error(
+                                f"Active {active_num}: invalid function '{func}'"
+                            )
+
+            # Check strength_level (1-3)
+            if hasattr(active, "strength_level") and active.strength_level is not None:
+                if active.strength_level not in (1, 2, 3):
+                    result.add_error(
+                        f"Active {active_num}: strength_level must be 1, 2, or 3, got {active.strength_level}"
+                    )
+
+            # Check irritation_potential (1-3)
+            if (
+                hasattr(active, "irritation_potential")
+                and active.irritation_potential is not None
+            ):
+                if active.irritation_potential not in (1, 2, 3):
+                    result.add_error(
+                        f"Active {active_num}: irritation_potential must be 1, 2, or 3, got {active.irritation_potential}"
+                    )
+
+            # Check percent is 0-100
+            if hasattr(active, "percent") and active.percent is not None:
+                if not isinstance(active.percent, (int, float)):
+                    result.add_error(
+                        f"Active {active_num}: percent must be a number, got {type(active.percent).__name__}"
+                    )
+                elif active.percent < 0 or active.percent > 100:
+                    result.add_error(
+                        f"Active {active_num}: percent must be 0-100, got {active.percent}"
+                    )
+
+    def _check_boolean_fields(self, obj: Any, result: ValidationResult) -> None:
+        """Check boolean fields are actual booleans."""
+        boolean_fields = [
+            "leave_on",
+            "fragrance_free",
+            "essential_oils_free",
+            "alcohol_denat_free",
+            "pregnancy_safe",
+            "is_medication",
+            "is_tool",
+        ]
+
+        for field in boolean_fields:
+            if hasattr(obj, field):
+                value = getattr(obj, field)
+                if value is not None and not isinstance(value, bool):
+                    result.add_error(
+                        f"{field} must be a boolean (true/false), got {type(value).__name__}"
+                    )
--- a/backend/innercontext/validators/routine_validator.py
+++ b/backend/innercontext/validators/routine_validator.py
@ -0,0 +1,312 @@
+"""Validator for routine suggestions (single day AM/PM)."""
+
+from dataclasses import dataclass
+from datetime import date
+from typing import Any
+from uuid import UUID
+
+from innercontext.validators.base import BaseValidator, ValidationResult
+
+
+@dataclass
+class RoutineValidationContext:
+    """Context needed to validate a routine suggestion."""
+
+    valid_product_ids: set[UUID]
+    """Set of product IDs that exist in the database."""
+
+    routine_date: date
+    """The date this routine is for."""
+
+    part_of_day: str
+    """'am' or 'pm'"""
+
+    leaving_home: bool | None
+    """Whether user is leaving home (for SPF check)."""
+
+    barrier_state: str | None
+    """Current barrier state: 'intact', 'mildly_compromised', 'compromised'"""
+
+    products_by_id: dict[UUID, Any]
+    """Map of product_id -> Product object for detailed checks."""
+
+    last_used_dates: dict[UUID, date]
+    """Map of product_id -> last used date."""
+
+    just_shaved: bool = False
+    """Whether user just shaved (affects context_rules check)."""
+
+
+class RoutineSuggestionValidator(BaseValidator):
+    """Validates routine suggestions for safety and correctness."""
+
+    PROHIBITED_FIELDS = {"dose", "amount", "quantity", "pumps", "drops"}
+
+    def validate(
+        self, response: Any, context: RoutineValidationContext
+    ) -> ValidationResult:
+        """
+        Validate a routine suggestion.
+
+        Checks:
+        1. All product_ids exist in database
+        2. No retinoid + acid in same routine
+        3. Respect min_interval_hours
+        4. Check max_frequency_per_week (if history available)
+        5. Verify context_rules (safe_after_shaving, safe_with_compromised_barrier)
+        6. AM routines must have SPF when leaving home
+        7. No high barrier_disruption_risk with compromised barrier
+        8. No prohibited fields (dose, etc.) in response
+        9. Each step has either product_id or action_type (not both, not neither)
+
+        Args:
+            response: Parsed routine suggestion with steps
+            context: Validation context with products and rules
+
+        Returns:
+            ValidationResult with any errors/warnings
+        """
+        result = ValidationResult()
+
+        if not hasattr(response, "steps"):
+            result.add_error("Response missing 'steps' field")
+            return result
+
+        steps = response.steps
+        has_retinoid = False
+        has_acid = False
+        has_spf = False
+        product_steps = []
+
+        for i, step in enumerate(steps):
+            step_num = i + 1
+
+            # Check prohibited fields
+            self._check_prohibited_fields(step, step_num, result)
+
+            # Check step has either product_id or action_type
+            has_product = hasattr(step, "product_id") and step.product_id is not None
+            has_action = hasattr(step, "action_type") and step.action_type is not None
+
+            if not has_product and not has_action:
+                result.add_error(
+                    f"Step {step_num}: must have either product_id or action_type"
+                )
+                continue
+
+            if has_product and has_action:
+                result.add_error(
+                    f"Step {step_num}: cannot have both product_id and action_type"
+                )
+                continue
+
+            # Skip action-only steps for product validation
+            if not has_product:
+                continue
+
+            product_id = step.product_id
+
+            # Convert string UUID to UUID object if needed
+            if isinstance(product_id, str):
+                try:
+                    product_id = UUID(product_id)
+                except ValueError:
+                    result.add_error(
+                        f"Step {step_num}: invalid UUID format: {product_id}"
+                    )
+                    continue
+
+            # Check product exists
+            if product_id not in context.valid_product_ids:
+                result.add_error(f"Step {step_num}: unknown product_id {product_id}")
+                continue
+
+            product = context.products_by_id.get(product_id)
+            if not product:
+                continue  # Can't do detailed checks without product data
+
+            product_steps.append((step_num, product_id, product))
+
+            # Check for retinoids and acids
+            if self._has_retinoid(product):
+                has_retinoid = True
+            if self._has_acid(product):
+                has_acid = True
+
+            # Check for SPF
+            if product.category == "spf":
+                has_spf = True
+
+            # Check interval rules
+            self._check_interval_rules(step_num, product_id, product, context, result)
+
+            # Check context rules
+            self._check_context_rules(step_num, product, context, result)
+
+            # Check barrier compatibility
+            self._check_barrier_compatibility(step_num, product, context, result)
+
+        # Check retinoid + acid conflict
+        if has_retinoid and has_acid:
+            result.add_error(
+                "SAFETY: Cannot combine retinoid and acid (AHA/BHA/PHA) in same routine"
+            )
+
+        # Check SPF requirement for AM
+        if context.part_of_day == "am":
+            if context.leaving_home and not has_spf:
+                result.add_warning(
+                    "AM routine without SPF while leaving home - UV protection recommended"
+                )
+            elif not context.leaving_home and not has_spf:
+                # Still warn but less severe
+                result.add_warning(
+                    "AM routine without SPF - consider adding sun protection"
+                )
+
+        return result
+
+    def _check_prohibited_fields(
+        self, step: Any, step_num: int, result: ValidationResult
+    ) -> None:
+        """Check for prohibited fields like 'dose' in step."""
+        for field in self.PROHIBITED_FIELDS:
+            if hasattr(step, field):
+                result.add_error(
+                    f"Step {step_num}: prohibited field '{field}' in response - "
+                    "doses/amounts should not be specified"
+                )
+
+    def _has_retinoid(self, product: Any) -> bool:
+        """Check if product contains retinoid."""
+        if not hasattr(product, "actives") or not product.actives:
+            return False
+
+        for active in product.actives:
+            if not hasattr(active, "functions"):
+                continue
+            if "retinoid" in (active.functions or []):
+                return True
+
+        # Also check effect_profile
+        if hasattr(product, "effect_profile") and product.effect_profile:
+            if hasattr(product.effect_profile, "retinoid_strength"):
+                if (product.effect_profile.retinoid_strength or 0) > 0:
+                    return True
+
+        return False
+
+    def _has_acid(self, product: Any) -> bool:
+        """Check if product contains AHA/BHA/PHA."""
+        if not hasattr(product, "actives") or not product.actives:
+            return False
+
+        acid_functions = {"exfoliant_aha", "exfoliant_bha", "exfoliant_pha"}
+
+        for active in product.actives:
+            if not hasattr(active, "functions"):
+                continue
+            if any(f in (active.functions or []) for f in acid_functions):
+                return True
+
+        # Also check effect_profile
+        if hasattr(product, "effect_profile") and product.effect_profile:
+            if hasattr(product.effect_profile, "exfoliation_strength"):
+                if (product.effect_profile.exfoliation_strength or 0) > 0:
+                    return True
+
+        return False
+
+    def _check_interval_rules(
+        self,
+        step_num: int,
+        product_id: UUID,
+        product: Any,
+        context: RoutineValidationContext,
+        result: ValidationResult,
+    ) -> None:
+        """Check min_interval_hours is respected."""
+        if not hasattr(product, "min_interval_hours") or not product.min_interval_hours:
+            return
+
+        last_used = context.last_used_dates.get(product_id)
+        if not last_used:
+            return  # Never used, no violation
+
+        hours_since_use = (context.routine_date - last_used).days * 24
+
+        # For same-day check, we need more granular time
+        # For now, just check if used same day
+        if last_used == context.routine_date:
+            result.add_error(
+                f"Step {step_num}: product {product.name} already used today, "
+                f"min_interval_hours={product.min_interval_hours}"
+            )
+        elif hours_since_use < product.min_interval_hours:
+            result.add_error(
+                f"Step {step_num}: product {product.name} used too recently "
+                f"(last used {last_used}, requires {product.min_interval_hours}h interval)"
+            )
+
+    def _check_context_rules(
+        self,
+        step_num: int,
+        product: Any,
+        context: RoutineValidationContext,
+        result: ValidationResult,
+    ) -> None:
+        """Check product context_rules are satisfied."""
+        if not hasattr(product, "context_rules") or not product.context_rules:
+            return
+
+        rules = product.context_rules
+
+        # Check post-shaving safety
+        if context.just_shaved and hasattr(rules, "safe_after_shaving"):
+            if not rules.safe_after_shaving:
+                result.add_warning(
+                    f"Step {step_num}: {product.name} may irritate freshly shaved skin"
+                )
+
+        # Check barrier compatibility
+        if context.barrier_state in ("mildly_compromised", "compromised"):
+            if hasattr(rules, "safe_with_compromised_barrier"):
+                if not rules.safe_with_compromised_barrier:
+                    result.add_error(
+                        f"Step {step_num}: SAFETY - {product.name} not safe with "
+                        f"{context.barrier_state} barrier"
+                    )
+
+    def _check_barrier_compatibility(
+        self,
+        step_num: int,
+        product: Any,
+        context: RoutineValidationContext,
+        result: ValidationResult,
+    ) -> None:
+        """Check product is safe for current barrier state."""
+        if context.barrier_state != "compromised":
+            return  # Only strict check for compromised barrier
+
+        if not hasattr(product, "effect_profile") or not product.effect_profile:
+            return
+
+        profile = product.effect_profile
+
+        # Check barrier disruption risk
+        if hasattr(profile, "barrier_disruption_risk"):
+            risk = profile.barrier_disruption_risk or 0
+            if risk >= 4:  # High risk (4-5)
+                result.add_error(
+                    f"Step {step_num}: SAFETY - {product.name} has high barrier "
+                    f"disruption risk ({risk}/5) - not safe with compromised barrier"
+                )
+
+        # Check irritation risk
+        if hasattr(profile, "irritation_risk"):
+            risk = profile.irritation_risk or 0
+            if risk >= 4:  # High risk
+                result.add_warning(
+                    f"Step {step_num}: {product.name} has high irritation risk ({risk}/5) "
+                    "- caution recommended with compromised barrier"
+                )
--- a/backend/innercontext/validators/shopping_validator.py
+++ b/backend/innercontext/validators/shopping_validator.py
@ -0,0 +1,229 @@
+"""Validator for shopping suggestions."""
+
+from dataclasses import dataclass
+from typing import Any
+from uuid import UUID
+
+from innercontext.validators.base import BaseValidator, ValidationResult
+
+
+@dataclass
+class ShoppingValidationContext:
+    """Context needed to validate shopping suggestions."""
+
+    owned_product_ids: set[UUID]
+    """Product IDs user already owns (with inventory)."""
+
+    valid_categories: set[str]
+    """Valid product categories."""
+
+    valid_targets: set[str]
+    """Valid skin concern targets."""
+
+
+class ShoppingValidator(BaseValidator):
+    """Validates shopping suggestions for product types."""
+
+    # Realistic product type patterns (not exhaustive, just sanity checks)
+    VALID_PRODUCT_TYPE_PATTERNS = {
+        "serum",
+        "cream",
+        "cleanser",
+        "toner",
+        "essence",
+        "moisturizer",
+        "spf",
+        "sunscreen",
+        "oil",
+        "balm",
+        "mask",
+        "exfoliant",
+        "acid",
+        "retinoid",
+        "vitamin",
+        "niacinamide",
+        "hyaluronic",
+        "ceramide",
+        "peptide",
+        "antioxidant",
+        "aha",
+        "bha",
+        "pha",
+    }
+
+    VALID_FREQUENCIES = {
+        "daily",
+        "twice daily",
+        "am",
+        "pm",
+        "both",
+        "2x weekly",
+        "3x weekly",
+        "2-3x weekly",
+        "weekly",
+        "as needed",
+        "occasional",
+    }
+
+    def validate(
+        self, response: Any, context: ShoppingValidationContext
+    ) -> ValidationResult:
+        """
+        Validate shopping suggestions.
+
+        Checks:
+        1. suggestions field present
+        2. Product types are realistic (contain known keywords)
+        3. Not suggesting products user already owns (should mark as [✗])
+        4. Recommended frequencies are valid
+        5. Categories are valid
+        6. Targets are valid
+        7. Each suggestion has required fields
+
+        Args:
+            response: Parsed shopping suggestion response
+            context: Validation context
+
+        Returns:
+            ValidationResult with any errors/warnings
+        """
+        result = ValidationResult()
+
+        if not hasattr(response, "suggestions"):
+            result.add_error("Response missing 'suggestions' field")
+            return result
+
+        suggestions = response.suggestions
+
+        if not isinstance(suggestions, list):
+            result.add_error("'suggestions' must be a list")
+            return result
+
+        for i, suggestion in enumerate(suggestions):
+            sug_num = i + 1
+
+            # Check required fields
+            self._check_required_fields(suggestion, sug_num, result)
+
+            # Check category is valid
+            if hasattr(suggestion, "category") and suggestion.category:
+                if suggestion.category not in context.valid_categories:
+                    result.add_error(
+                        f"Suggestion {sug_num}: invalid category '{suggestion.category}'"
+                    )
+
+            # Check product type is realistic
+            if hasattr(suggestion, "product_type") and suggestion.product_type:
+                self._check_product_type_realistic(
+                    suggestion.product_type, sug_num, result
+                )
+
+            # Check frequency is valid
+            if hasattr(suggestion, "frequency") and suggestion.frequency:
+                self._check_frequency_valid(suggestion.frequency, sug_num, result)
+
+            # Check targets are valid
+            if hasattr(suggestion, "target_concerns") and suggestion.target_concerns:
+                self._check_targets_valid(
+                    suggestion.target_concerns, sug_num, context, result
+                )
+
+            # Check recommended_time is valid
+            if hasattr(suggestion, "recommended_time") and suggestion.recommended_time:
+                if suggestion.recommended_time not in ("am", "pm", "both"):
+                    result.add_error(
+                        f"Suggestion {sug_num}: invalid recommended_time "
+                        f"'{suggestion.recommended_time}' (must be 'am', 'pm', or 'both')"
+                    )
+
+        return result
+
+    def _check_required_fields(
+        self, suggestion: Any, sug_num: int, result: ValidationResult
+    ) -> None:
+        """Check suggestion has required fields."""
+        required = ["category", "product_type", "why_needed"]
+
+        for field in required:
+            if not hasattr(suggestion, field) or getattr(suggestion, field) is None:
+                result.add_error(
+                    f"Suggestion {sug_num}: missing required field '{field}'"
+                )
+
+    def _check_product_type_realistic(
+        self, product_type: str, sug_num: int, result: ValidationResult
+    ) -> None:
+        """Check product type contains realistic keywords."""
+        product_type_lower = product_type.lower()
+
+        # Check if any valid pattern appears in the product type
+        has_valid_keyword = any(
+            pattern in product_type_lower
+            for pattern in self.VALID_PRODUCT_TYPE_PATTERNS
+        )
+
+        if not has_valid_keyword:
+            result.add_warning(
+                f"Suggestion {sug_num}: product type '{product_type}' looks unusual - "
+                "verify it's a real skincare product category"
+            )
+
+        # Check for brand names (shouldn't suggest specific brands)
+        suspicious_brands = [
+            "la roche",
+            "cerave",
+            "paula",
+            "ordinary",
+            "skinceuticals",
+            "drunk elephant",
+            "versed",
+            "inkey",
+            "cosrx",
+            "pixi",
+        ]
+
+        if any(brand in product_type_lower for brand in suspicious_brands):
+            result.add_error(
+                f"Suggestion {sug_num}: product type contains brand name - "
+                "should suggest product TYPES only, not specific brands"
+            )
+
+    def _check_frequency_valid(
+        self, frequency: str, sug_num: int, result: ValidationResult
+    ) -> None:
+        """Check frequency is a recognized pattern."""
+        frequency_lower = frequency.lower()
+
+        # Check for exact matches or common patterns
+        is_valid = (
+            frequency_lower in self.VALID_FREQUENCIES
+            or "daily" in frequency_lower
+            or "weekly" in frequency_lower
+            or "am" in frequency_lower
+            or "pm" in frequency_lower
+            or "x" in frequency_lower  # e.g. "2x weekly"
+        )
+
+        if not is_valid:
+            result.add_warning(
+                f"Suggestion {sug_num}: unusual frequency '{frequency}' - "
+                "verify it's a realistic usage pattern"
+            )
+
+    def _check_targets_valid(
+        self,
+        target_concerns: list[str],
+        sug_num: int,
+        context: ShoppingValidationContext,
+        result: ValidationResult,
+    ) -> None:
+        """Check target concerns are valid."""
+        if not isinstance(target_concerns, list):
+            result.add_error(f"Suggestion {sug_num}: target_concerns must be a list")
+            return
+
+        for target in target_concerns:
+            if target not in context.valid_targets:
+                result.add_error(
+                    f"Suggestion {sug_num}: invalid target concern '{target}'"
+                )