feat: AI pre-fill for product form via Gemini API

Add POST /products/parse-text endpoint that accepts raw product text, calls Gemini (google-genai) with a structured extraction prompt, and returns a partial ProductParseResponse. Frontend gains a collapsible "AI pre-fill" card at the top of ProductForm that merges the LLM response into all form fields reactively. - Backend: ProductParseRequest/Response schemas, system prompt with enum constraints, temperature=0.0 for deterministic extraction, effect_profile always returned in full - Frontend: parseProductText() in api.ts; controlled $state bindings for all text/number/checkbox inputs; applyAiResult() merges response Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-27 23:04:24 +01:00 · 2026-02-27 23:04:24 +01:00 · 31e030eaac
commit 31e030eaac
parent c413e27768
5 changed files with 721 additions and 101 deletions
--- a/backend/innercontext/api/products.py
+++ b/backend/innercontext/api/products.py
@ -1,8 +1,13 @@
+import json
+import os
 from datetime import date
 from typing import Optional
 from uuid import UUID, uuid4

-from fastapi import APIRouter, Depends, Query
+from fastapi import APIRouter, Depends, HTTPException, Query
+from google import genai
+from google.genai import types as genai_types
+from pydantic import ValidationError
 from sqlmodel import Session, SQLModel, select

 from db import get_session
@ -95,6 +100,50 @@ class ProductUpdate(SQLModel):
    personal_repurchase_intent: Optional[bool] = None


+class ProductParseRequest(SQLModel):
+    text: str
+
+
+class ProductParseResponse(SQLModel):
+    name: Optional[str] = None
+    brand: Optional[str] = None
+    line_name: Optional[str] = None
+    sku: Optional[str] = None
+    url: Optional[str] = None
+    barcode: Optional[str] = None
+    category: Optional[ProductCategory] = None
+    recommended_time: Optional[DayTime] = None
+    texture: Optional[TextureType] = None
+    absorption_speed: Optional[AbsorptionSpeed] = None
+    leave_on: Optional[bool] = None
+    price_tier: Optional[PriceTier] = None
+    size_ml: Optional[float] = None
+    full_weight_g: Optional[float] = None
+    empty_weight_g: Optional[float] = None
+    pao_months: Optional[int] = None
+    inci: Optional[list[str]] = None
+    actives: Optional[list[ActiveIngredient]] = None
+    recommended_for: Optional[list[SkinType]] = None
+    targets: Optional[list[SkinConcern]] = None
+    contraindications: Optional[list[str]] = None
+    usage_notes: Optional[str] = None
+    fragrance_free: Optional[bool] = None
+    essential_oils_free: Optional[bool] = None
+    alcohol_denat_free: Optional[bool] = None
+    pregnancy_safe: Optional[bool] = None
+    product_effect_profile: Optional[ProductEffectProfile] = None
+    ph_min: Optional[float] = None
+    ph_max: Optional[float] = None
+    incompatible_with: Optional[list[ProductInteraction]] = None
+    synergizes_with: Optional[list[str]] = None
+    context_rules: Optional[ProductContext] = None
+    min_interval_hours: Optional[int] = None
+    max_frequency_per_week: Optional[int] = None
+    is_medication: Optional[bool] = None
+    is_tool: Optional[bool] = None
+    needle_length_mm: Optional[float] = None
+
+
 class InventoryCreate(SQLModel):
    is_opened: bool = False
    opened_at: Optional[date] = None
@ -168,6 +217,164 @@ def create_product(data: ProductCreate, session: Session = Depends(get_session))
    return product


+def _product_parse_system_prompt() -> str:
+    return """\
+You are a skincare and cosmetics product data extraction expert. \
+Given raw text (product page copy, ingredient list, label scan, etc.), \
+extract structured product data and return it as a single JSON object.
+
+RULES:
+- Return ONLY raw JSON — no markdown code fences, no explanation, no preamble.
+- Omit any field you cannot confidently determine from the text. Do not guess.
+- All enum values must exactly match the allowed strings listed below.
+- For INCI lists: return each ingredient as a separate string in the array, \
+preserving standard INCI names exactly as they appear.
+- For actives: extract name, concentration (numeric, 0–100), functions \
+(use the allowed strings), and strength/irritation level if inferable.
+- For effect_profile scores (0–5 int): ALWAYS return the full product_effect_profile \
+object with all 13 fields. Infer each score from ingredient activity and product claims. \
+Use 0 only when you truly have no basis for an estimate.
+- For pH: extract from explicit mention (e.g. "pH 5.5", "pH range 4.0–5.0"). \
+Do not infer from ingredients alone.
+- For context_rules: infer from usage instructions and ingredient interactions \
+(e.g. "do not use with AHAs" → safe_after_acids: false).
+- fragrance_free / essential_oils_free / alcohol_denat_free: infer from INCI \
+or explicit claims. Fragrance = "Parfum" or "Fragrance" in INCI → fragrance_free: false.
+- For leave_on: true = leave-on treatment, false = rinse-off (cleanser, mask to rinse).
+- recommended_time: "am" if contains SPF or vitamin C; "pm" if retinoid/retinol; \
+"both" otherwise (when unclear, use "both").
+
+ENUM ALLOWED VALUES (use ONLY these exact strings):
+
+category: "cleanser" | "toner" | "essence" | "serum" | "moisturizer" | "spf" | \
+"mask" | "exfoliant" | "hair_treatment" | "tool" | "spot_treatment" | "oil"
+
+recommended_time: "am" | "pm" | "both"
+
+texture: "watery" | "gel" | "emulsion" | "cream" | "oil" | "balm" | "foam" | "fluid"
+
+absorption_speed: "very_fast" | "fast" | "moderate" | "slow" | "very_slow"
+
+price_tier: "budget" | "mid" | "premium" | "luxury"
+
+recommended_for (array, pick applicable):
+"dry" | "oily" | "combination" | "sensitive" | "normal" | "acne_prone"
+
+targets (array, pick applicable):
+"acne" | "rosacea" | "hyperpigmentation" | "aging" | "dehydration" | "redness" | \
+"damaged_barrier" | "pore_visibility" | "uneven_texture" | "hair_growth" | "sebum_excess"
+
+actives[].functions (array, pick applicable):
+"humectant" | "emollient" | "occlusive" | "exfoliant_aha" | "exfoliant_bha" | \
+"exfoliant_pha" | "retinoid" | "antioxidant" | "soothing" | "barrier_support" | \
+"brightening" | "anti_acne" | "ceramide" | "niacinamide" | "sunscreen" | "peptide" | \
+"hair_growth_stimulant" | "prebiotic" | "vitamin_c"
+
+actives[].strength_level: 1 (low) | 2 (medium) | 3 (high)
+actives[].irritation_potential: 1 (low) | 2 (medium) | 3 (high)
+
+incompatible_with[].scope: "same_step" | "same_day" | "same_period"
+
+OUTPUT SCHEMA (all fields optional — omit what you cannot determine):
+{
+  "name": string,
+  "brand": string,
+  "line_name": string,
+  "sku": string,
+  "url": string,
+  "barcode": string,
+  "category": string,
+  "recommended_time": string,
+  "texture": string,
+  "absorption_speed": string,
+  "leave_on": boolean,
+  "price_tier": string,
+  "size_ml": number,
+  "full_weight_g": number,
+  "empty_weight_g": number,
+  "pao_months": integer,
+  "inci": [string, ...],
+  "actives": [
+    {
+      "name": string,
+      "percent": number,
+      "functions": [string, ...],
+      "strength_level": 1|2|3,
+      "irritation_potential": 1|2|3
+    }
+  ],
+  "recommended_for": [string, ...],
+  "targets": [string, ...],
+  "contraindications": [string, ...],
+  "usage_notes": string,
+  "fragrance_free": boolean,
+  "essential_oils_free": boolean,
+  "alcohol_denat_free": boolean,
+  "pregnancy_safe": boolean,
+  "product_effect_profile": {
+    "hydration_immediate": integer (0-5),
+    "hydration_long_term": integer (0-5),
+    "barrier_repair_strength": integer (0-5),
+    "soothing_strength": integer (0-5),
+    "exfoliation_strength": integer (0-5),
+    "retinoid_strength": integer (0-5),
+    "irritation_risk": integer (0-5),
+    "comedogenic_risk": integer (0-5),
+    "barrier_disruption_risk": integer (0-5),
+    "dryness_risk": integer (0-5),
+    "brightening_strength": integer (0-5),
+    "anti_acne_strength": integer (0-5),
+    "anti_aging_strength": integer (0-5)
+  },
+  "ph_min": number,
+  "ph_max": number,
+  "incompatible_with": [
+    {"target": string, "scope": string, "reason": string}
+  ],
+  "synergizes_with": [string, ...],
+  "context_rules": {
+    "safe_after_shaving": boolean,
+    "safe_after_acids": boolean,
+    "safe_after_retinoids": boolean,
+    "safe_with_compromised_barrier": boolean,
+    "low_uv_only": boolean
+  },
+  "min_interval_hours": integer,
+  "max_frequency_per_week": integer,
+  "is_medication": boolean,
+  "is_tool": boolean,
+  "needle_length_mm": number
+}
+"""
+
+
+@router.post("/parse-text", response_model=ProductParseResponse)
+def parse_product_text(data: ProductParseRequest) -> ProductParseResponse:
+    api_key = os.environ.get("GEMINI_API_KEY")
+    if not api_key:
+        raise HTTPException(status_code=503, detail="GEMINI_API_KEY not configured")
+    model = os.environ.get("GEMINI_MODEL", "gemini-flash-latest")
+    client = genai.Client(api_key=api_key)
+    response = client.models.generate_content(
+        model=model,
+        contents=f"Extract product data from this text:\n\n{data.text}",
+        config=genai_types.GenerateContentConfig(
+            system_instruction=_product_parse_system_prompt(),
+            response_mime_type="application/json",
+            max_output_tokens=4096,
+            temperature=0.0,
+        ),
+    )
+    try:
+        parsed = json.loads(response.text)
+    except (json.JSONDecodeError, Exception) as e:
+        raise HTTPException(status_code=502, detail=f"LLM returned invalid JSON: {e}")
+    try:
+        return ProductParseResponse.model_validate(parsed)
+    except ValidationError as e:
+        raise HTTPException(status_code=422, detail=e.errors())
+
+
@router.get("/{product_id}", response_model=ProductWithInventory)
 def get_product(product_id: UUID, session: Session = Depends(get_session)):
    product = get_or_404(session, Product, product_id)