feat: AI pre-fill for product form via Gemini API

Add POST /products/parse-text endpoint that accepts raw product text,
calls Gemini (google-genai) with a structured extraction prompt, and
returns a partial ProductParseResponse. Frontend gains a collapsible
"AI pre-fill" card at the top of ProductForm that merges the LLM
response into all form fields reactively.

- Backend: ProductParseRequest/Response schemas, system prompt with
  enum constraints, temperature=0.0 for deterministic extraction,
  effect_profile always returned in full
- Frontend: parseProductText() in api.ts; controlled $state bindings
  for all text/number/checkbox inputs; applyAiResult() merges response

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Piotr Oleszczyk 2026-02-27 23:04:24 +01:00
parent c413e27768
commit 31e030eaac
5 changed files with 721 additions and 101 deletions

View file

@ -1,8 +1,13 @@
import json
import os
from datetime import date
from typing import Optional
from uuid import UUID, uuid4
from fastapi import APIRouter, Depends, Query
from fastapi import APIRouter, Depends, HTTPException, Query
from google import genai
from google.genai import types as genai_types
from pydantic import ValidationError
from sqlmodel import Session, SQLModel, select
from db import get_session
@ -95,6 +100,50 @@ class ProductUpdate(SQLModel):
personal_repurchase_intent: Optional[bool] = None
class ProductParseRequest(SQLModel):
text: str
class ProductParseResponse(SQLModel):
name: Optional[str] = None
brand: Optional[str] = None
line_name: Optional[str] = None
sku: Optional[str] = None
url: Optional[str] = None
barcode: Optional[str] = None
category: Optional[ProductCategory] = None
recommended_time: Optional[DayTime] = None
texture: Optional[TextureType] = None
absorption_speed: Optional[AbsorptionSpeed] = None
leave_on: Optional[bool] = None
price_tier: Optional[PriceTier] = None
size_ml: Optional[float] = None
full_weight_g: Optional[float] = None
empty_weight_g: Optional[float] = None
pao_months: Optional[int] = None
inci: Optional[list[str]] = None
actives: Optional[list[ActiveIngredient]] = None
recommended_for: Optional[list[SkinType]] = None
targets: Optional[list[SkinConcern]] = None
contraindications: Optional[list[str]] = None
usage_notes: Optional[str] = None
fragrance_free: Optional[bool] = None
essential_oils_free: Optional[bool] = None
alcohol_denat_free: Optional[bool] = None
pregnancy_safe: Optional[bool] = None
product_effect_profile: Optional[ProductEffectProfile] = None
ph_min: Optional[float] = None
ph_max: Optional[float] = None
incompatible_with: Optional[list[ProductInteraction]] = None
synergizes_with: Optional[list[str]] = None
context_rules: Optional[ProductContext] = None
min_interval_hours: Optional[int] = None
max_frequency_per_week: Optional[int] = None
is_medication: Optional[bool] = None
is_tool: Optional[bool] = None
needle_length_mm: Optional[float] = None
class InventoryCreate(SQLModel):
is_opened: bool = False
opened_at: Optional[date] = None
@ -168,6 +217,164 @@ def create_product(data: ProductCreate, session: Session = Depends(get_session))
return product
def _product_parse_system_prompt() -> str:
return """\
You are a skincare and cosmetics product data extraction expert. \
Given raw text (product page copy, ingredient list, label scan, etc.), \
extract structured product data and return it as a single JSON object.
RULES:
- Return ONLY raw JSON no markdown code fences, no explanation, no preamble.
- Omit any field you cannot confidently determine from the text. Do not guess.
- All enum values must exactly match the allowed strings listed below.
- For INCI lists: return each ingredient as a separate string in the array, \
preserving standard INCI names exactly as they appear.
- For actives: extract name, concentration (numeric, 0100), functions \
(use the allowed strings), and strength/irritation level if inferable.
- For effect_profile scores (05 int): ALWAYS return the full product_effect_profile \
object with all 13 fields. Infer each score from ingredient activity and product claims. \
Use 0 only when you truly have no basis for an estimate.
- For pH: extract from explicit mention (e.g. "pH 5.5", "pH range 4.05.0"). \
Do not infer from ingredients alone.
- For context_rules: infer from usage instructions and ingredient interactions \
(e.g. "do not use with AHAs" safe_after_acids: false).
- fragrance_free / essential_oils_free / alcohol_denat_free: infer from INCI \
or explicit claims. Fragrance = "Parfum" or "Fragrance" in INCI fragrance_free: false.
- For leave_on: true = leave-on treatment, false = rinse-off (cleanser, mask to rinse).
- recommended_time: "am" if contains SPF or vitamin C; "pm" if retinoid/retinol; \
"both" otherwise (when unclear, use "both").
ENUM ALLOWED VALUES (use ONLY these exact strings):
category: "cleanser" | "toner" | "essence" | "serum" | "moisturizer" | "spf" | \
"mask" | "exfoliant" | "hair_treatment" | "tool" | "spot_treatment" | "oil"
recommended_time: "am" | "pm" | "both"
texture: "watery" | "gel" | "emulsion" | "cream" | "oil" | "balm" | "foam" | "fluid"
absorption_speed: "very_fast" | "fast" | "moderate" | "slow" | "very_slow"
price_tier: "budget" | "mid" | "premium" | "luxury"
recommended_for (array, pick applicable):
"dry" | "oily" | "combination" | "sensitive" | "normal" | "acne_prone"
targets (array, pick applicable):
"acne" | "rosacea" | "hyperpigmentation" | "aging" | "dehydration" | "redness" | \
"damaged_barrier" | "pore_visibility" | "uneven_texture" | "hair_growth" | "sebum_excess"
actives[].functions (array, pick applicable):
"humectant" | "emollient" | "occlusive" | "exfoliant_aha" | "exfoliant_bha" | \
"exfoliant_pha" | "retinoid" | "antioxidant" | "soothing" | "barrier_support" | \
"brightening" | "anti_acne" | "ceramide" | "niacinamide" | "sunscreen" | "peptide" | \
"hair_growth_stimulant" | "prebiotic" | "vitamin_c"
actives[].strength_level: 1 (low) | 2 (medium) | 3 (high)
actives[].irritation_potential: 1 (low) | 2 (medium) | 3 (high)
incompatible_with[].scope: "same_step" | "same_day" | "same_period"
OUTPUT SCHEMA (all fields optional omit what you cannot determine):
{
"name": string,
"brand": string,
"line_name": string,
"sku": string,
"url": string,
"barcode": string,
"category": string,
"recommended_time": string,
"texture": string,
"absorption_speed": string,
"leave_on": boolean,
"price_tier": string,
"size_ml": number,
"full_weight_g": number,
"empty_weight_g": number,
"pao_months": integer,
"inci": [string, ...],
"actives": [
{
"name": string,
"percent": number,
"functions": [string, ...],
"strength_level": 1|2|3,
"irritation_potential": 1|2|3
}
],
"recommended_for": [string, ...],
"targets": [string, ...],
"contraindications": [string, ...],
"usage_notes": string,
"fragrance_free": boolean,
"essential_oils_free": boolean,
"alcohol_denat_free": boolean,
"pregnancy_safe": boolean,
"product_effect_profile": {
"hydration_immediate": integer (0-5),
"hydration_long_term": integer (0-5),
"barrier_repair_strength": integer (0-5),
"soothing_strength": integer (0-5),
"exfoliation_strength": integer (0-5),
"retinoid_strength": integer (0-5),
"irritation_risk": integer (0-5),
"comedogenic_risk": integer (0-5),
"barrier_disruption_risk": integer (0-5),
"dryness_risk": integer (0-5),
"brightening_strength": integer (0-5),
"anti_acne_strength": integer (0-5),
"anti_aging_strength": integer (0-5)
},
"ph_min": number,
"ph_max": number,
"incompatible_with": [
{"target": string, "scope": string, "reason": string}
],
"synergizes_with": [string, ...],
"context_rules": {
"safe_after_shaving": boolean,
"safe_after_acids": boolean,
"safe_after_retinoids": boolean,
"safe_with_compromised_barrier": boolean,
"low_uv_only": boolean
},
"min_interval_hours": integer,
"max_frequency_per_week": integer,
"is_medication": boolean,
"is_tool": boolean,
"needle_length_mm": number
}
"""
@router.post("/parse-text", response_model=ProductParseResponse)
def parse_product_text(data: ProductParseRequest) -> ProductParseResponse:
api_key = os.environ.get("GEMINI_API_KEY")
if not api_key:
raise HTTPException(status_code=503, detail="GEMINI_API_KEY not configured")
model = os.environ.get("GEMINI_MODEL", "gemini-flash-latest")
client = genai.Client(api_key=api_key)
response = client.models.generate_content(
model=model,
contents=f"Extract product data from this text:\n\n{data.text}",
config=genai_types.GenerateContentConfig(
system_instruction=_product_parse_system_prompt(),
response_mime_type="application/json",
max_output_tokens=4096,
temperature=0.0,
),
)
try:
parsed = json.loads(response.text)
except (json.JSONDecodeError, Exception) as e:
raise HTTPException(status_code=502, detail=f"LLM returned invalid JSON: {e}")
try:
return ProductParseResponse.model_validate(parsed)
except ValidationError as e:
raise HTTPException(status_code=422, detail=e.errors())
@router.get("/{product_id}", response_model=ProductWithInventory)
def get_product(product_id: UUID, session: Session = Depends(get_session)):
product = get_or_404(session, Product, product_id)