From ba1f10d99f07696b361b623a282e005928bbe99a Mon Sep 17 00:00:00 2001
From: Piotr Oleszczyk <piotr@oleszczyk.eu>
Date: Tue, 3 Mar 2026 21:24:23 +0100
Subject: [PATCH] refactor(llm): optimize Gemini config profiles for extraction
 and creativity

Introduces `get_extraction_config` and `get_creative_config` to standardize Gemini API calls.

* Defines explicit config profiles with appropriate `temperature` and `thinking_level` for Gemini 3 Flash.
* Extraction tasks use minimal thinking and temp=0.0 to reduce latency and token usage.
* Creative tasks use low thinking, temp=0.4, and top_p=0.8 to balance naturalness and safety.
* Applies these helpers across products, routines, and skincare endpoints.
* Also updates default model to `gemini-3-flash-preview`.
---
 backend/innercontext/api/products.py | 11 ++----
 backend/innercontext/api/routines.py | 11 ++----
 backend/innercontext/api/skincare.py |  6 ++--
 backend/innercontext/llm.py          | 52 +++++++++++++++++++++-------
 backend/test_query.py                | 25 +++++++++++++
 5 files changed, 72 insertions(+), 33 deletions(-)
 create mode 100644 backend/test_query.py

diff --git a/backend/innercontext/api/products.py b/backend/innercontext/api/products.py
index ac99681..32a8bad 100644
--- a/backend/innercontext/api/products.py
+++ b/backend/innercontext/api/products.py
@@ -4,14 +4,13 @@ from typing import Optional
 from uuid import UUID, uuid4
 
 from fastapi import APIRouter, Depends, HTTPException, Query
-from google.genai import types as genai_types
 from pydantic import BaseModel as PydanticBase
 from pydantic import ValidationError
 from sqlmodel import Session, SQLModel, col, select
 
 from db import get_session
 from innercontext.api.utils import get_or_404
-from innercontext.llm import call_gemini
+from innercontext.llm import call_gemini, get_creative_config, get_extraction_config
 from innercontext.models import (
     Product,
     ProductBase,
@@ -422,12 +421,10 @@ def parse_product_text(data: ProductParseRequest) -> ProductParseResponse:
     response = call_gemini(
         endpoint="products/parse-text",
         contents=f"Extract product data from this text:\n\n{data.text}",
-        config=genai_types.GenerateContentConfig(
+        config=get_extraction_config(
             system_instruction=_product_parse_system_prompt(),
-            response_mime_type="application/json",
             response_schema=ProductParseLLMResponse,
             max_output_tokens=16384,
-            temperature=0.0,
         ),
         user_input=data.text,
     )
@@ -637,12 +634,10 @@ def suggest_shopping(session: Session = Depends(get_session)):
     response = call_gemini(
         endpoint="products/suggest",
         contents=prompt,
-        config=genai_types.GenerateContentConfig(
+        config=get_creative_config(
             system_instruction=_SHOPPING_SYSTEM_PROMPT,
-            response_mime_type="application/json",
             response_schema=_ShoppingSuggestionsOut,
             max_output_tokens=4096,
-            temperature=0.4,
         ),
         user_input=prompt,
     )
diff --git a/backend/innercontext/api/routines.py b/backend/innercontext/api/routines.py
index a7faddc..13e47bc 100644
--- a/backend/innercontext/api/routines.py
+++ b/backend/innercontext/api/routines.py
@@ -4,13 +4,12 @@ from typing import Optional
 from uuid import UUID, uuid4
 
 from fastapi import APIRouter, Depends, HTTPException
-from google.genai import types as genai_types
 from pydantic import BaseModel as PydanticBase
 from sqlmodel import Session, SQLModel, col, select
 
 from db import get_session
 from innercontext.api.utils import get_or_404
-from innercontext.llm import call_gemini
+from innercontext.llm import call_gemini, get_creative_config
 from innercontext.models import (
     GroomingSchedule,
     Product,
@@ -522,12 +521,10 @@ def suggest_routine(
     response = call_gemini(
         endpoint="routines/suggest",
         contents=prompt,
-        config=genai_types.GenerateContentConfig(
+        config=get_creative_config(
             system_instruction=_ROUTINES_SYSTEM_PROMPT,
-            response_mime_type="application/json",
             response_schema=_SuggestionOut,
             max_output_tokens=4096,
-            temperature=0.4,
         ),
         user_input=prompt,
     )
@@ -600,12 +597,10 @@ def suggest_batch(
     response = call_gemini(
         endpoint="routines/suggest-batch",
         contents=prompt,
-        config=genai_types.GenerateContentConfig(
+        config=get_creative_config(
             system_instruction=_ROUTINES_SYSTEM_PROMPT,
-            response_mime_type="application/json",
             response_schema=_BatchOut,
             max_output_tokens=8192,
-            temperature=0.4,
         ),
         user_input=prompt,
     )
diff --git a/backend/innercontext/api/skincare.py b/backend/innercontext/api/skincare.py
index 31407db..8998e50 100644
--- a/backend/innercontext/api/skincare.py
+++ b/backend/innercontext/api/skincare.py
@@ -11,7 +11,7 @@ from sqlmodel import Session, SQLModel, select
 
 from db import get_session
 from innercontext.api.utils import get_or_404
-from innercontext.llm import call_gemini
+from innercontext.llm import call_gemini, get_extraction_config
 from innercontext.models import (
     SkinConditionSnapshot,
     SkinConditionSnapshotBase,
@@ -171,12 +171,10 @@ async def analyze_skin_photos(
     response = call_gemini(
         endpoint="skincare/analyze-photos",
         contents=parts,
-        config=genai_types.GenerateContentConfig(
+        config=get_extraction_config(
             system_instruction=_skin_photo_system_prompt(),
-            response_mime_type="application/json",
             response_schema=_SkinAnalysisOut,
             max_output_tokens=2048,
-            temperature=0.0,
         ),
         user_input=image_summary,
     )
diff --git a/backend/innercontext/llm.py b/backend/innercontext/llm.py
index ea78ecf..3381566 100644
--- a/backend/innercontext/llm.py
+++ b/backend/innercontext/llm.py
@@ -3,12 +3,50 @@
 import os
 import time
 from contextlib import suppress
+from typing import Any
 
 from fastapi import HTTPException
 from google import genai
 from google.genai import types as genai_types
 
-_DEFAULT_MODEL = "gemini-flash-latest"
+_DEFAULT_MODEL = "gemini-3-flash-preview"
+
+
+def get_extraction_config(
+    system_instruction: str,
+    response_schema: Any,
+    max_output_tokens: int = 8192,
+) -> genai_types.GenerateContentConfig:
+    """Config for strict data extraction (deterministic, minimal thinking)."""
+    return genai_types.GenerateContentConfig(
+        system_instruction=system_instruction,
+        response_mime_type="application/json",
+        response_schema=response_schema,
+        max_output_tokens=max_output_tokens,
+        temperature=0.0,
+        thinking_config=genai_types.ThinkingConfig(
+            thinking_level=genai_types.ThinkingLevel.MINIMAL
+        ),
+    )
+
+
+def get_creative_config(
+    system_instruction: str,
+    response_schema: Any,
+    max_output_tokens: int = 4096,
+) -> genai_types.GenerateContentConfig:
+    """Config for creative tasks like recommendations (balanced creativity)."""
+    return genai_types.GenerateContentConfig(
+        system_instruction=system_instruction,
+        response_mime_type="application/json",
+        response_schema=response_schema,
+        max_output_tokens=max_output_tokens,
+        temperature=0.4,
+        top_p=0.8,
+        thinking_config=genai_types.ThinkingConfig(
+            thinking_level=genai_types.ThinkingLevel.LOW
+        ),
+    )
 
 
 def get_gemini_client() -> tuple[genai.Client, str]:
@@ -46,18 +84,6 @@ def call_gemini(
         with suppress(Exception):
             user_input = str(contents)
 
-    # Limit thinking by default — Gemini 3 Flash defaults to "high" thinking which
-    # consumes most of the token budget before generating actual output.
-    # Use "low" to reduce latency while keeping basic reasoning intact.
-    if config.thinking_config is None:
-        config = config.model_copy(
-            update={
-                "thinking_config": genai_types.ThinkingConfig(
-                    thinking_level=genai_types.ThinkingLevel.LOW
-                )
-            }
-        )
-
     start = time.monotonic()
     success, error_detail, response, finish_reason = True, None, None, None
     try:
diff --git a/backend/test_query.py b/backend/test_query.py
new file mode 100644
index 0000000..46b5f20
--- /dev/null
+++ b/backend/test_query.py
@@ -0,0 +1,25 @@
+from datetime import date, timedelta
+
+from sqlmodel import select
+
+from db import get_session
+from innercontext.models import Routine, RoutineStep
+
+
+def run():
+    session = next(get_session())
+    ref_date = date.today()
+    cutoff = ref_date - timedelta(days=7)
+
+    recent_usage = session.exec(
+        select(RoutineStep.product_id)
+        .join(Routine, Routine.id == RoutineStep.routine_id)
+        .where(Routine.routine_date >= cutoff)
+        .where(Routine.routine_date <= ref_date)
+    ).all()
+
+    print("Found:", len(recent_usage))
+
+
+if __name__ == "__main__":
+    run()