From ba1f10d99f07696b361b623a282e005928bbe99a Mon Sep 17 00:00:00 2001 From: Piotr Oleszczyk Date: Tue, 3 Mar 2026 21:24:23 +0100 Subject: [PATCH] refactor(llm): optimize Gemini config profiles for extraction and creativity Introduces `get_extraction_config` and `get_creative_config` to standardize Gemini API calls. * Defines explicit config profiles with appropriate `temperature` and `thinking_level` for Gemini 3 Flash. * Extraction tasks use minimal thinking and temp=0.0 to reduce latency and token usage. * Creative tasks use low thinking, temp=0.4, and top_p=0.8 to balance naturalness and safety. * Applies these helpers across products, routines, and skincare endpoints. * Also updates default model to `gemini-3-flash-preview`. --- backend/innercontext/api/products.py | 11 ++---- backend/innercontext/api/routines.py | 11 ++---- backend/innercontext/api/skincare.py | 6 ++-- backend/innercontext/llm.py | 52 +++++++++++++++++++++------- backend/test_query.py | 25 +++++++++++++ 5 files changed, 72 insertions(+), 33 deletions(-) create mode 100644 backend/test_query.py diff --git a/backend/innercontext/api/products.py b/backend/innercontext/api/products.py index ac99681..32a8bad 100644 --- a/backend/innercontext/api/products.py +++ b/backend/innercontext/api/products.py @@ -4,14 +4,13 @@ from typing import Optional from uuid import UUID, uuid4 from fastapi import APIRouter, Depends, HTTPException, Query -from google.genai import types as genai_types from pydantic import BaseModel as PydanticBase from pydantic import ValidationError from sqlmodel import Session, SQLModel, col, select from db import get_session from innercontext.api.utils import get_or_404 -from innercontext.llm import call_gemini +from innercontext.llm import call_gemini, get_creative_config, get_extraction_config from innercontext.models import ( Product, ProductBase, @@ -422,12 +421,10 @@ def parse_product_text(data: ProductParseRequest) -> ProductParseResponse: response = call_gemini( endpoint="products/parse-text", contents=f"Extract product data from this text:\n\n{data.text}", - config=genai_types.GenerateContentConfig( + config=get_extraction_config( system_instruction=_product_parse_system_prompt(), - response_mime_type="application/json", response_schema=ProductParseLLMResponse, max_output_tokens=16384, - temperature=0.0, ), user_input=data.text, ) @@ -637,12 +634,10 @@ def suggest_shopping(session: Session = Depends(get_session)): response = call_gemini( endpoint="products/suggest", contents=prompt, - config=genai_types.GenerateContentConfig( + config=get_creative_config( system_instruction=_SHOPPING_SYSTEM_PROMPT, - response_mime_type="application/json", response_schema=_ShoppingSuggestionsOut, max_output_tokens=4096, - temperature=0.4, ), user_input=prompt, ) diff --git a/backend/innercontext/api/routines.py b/backend/innercontext/api/routines.py index a7faddc..13e47bc 100644 --- a/backend/innercontext/api/routines.py +++ b/backend/innercontext/api/routines.py @@ -4,13 +4,12 @@ from typing import Optional from uuid import UUID, uuid4 from fastapi import APIRouter, Depends, HTTPException -from google.genai import types as genai_types from pydantic import BaseModel as PydanticBase from sqlmodel import Session, SQLModel, col, select from db import get_session from innercontext.api.utils import get_or_404 -from innercontext.llm import call_gemini +from innercontext.llm import call_gemini, get_creative_config from innercontext.models import ( GroomingSchedule, Product, @@ -522,12 +521,10 @@ def suggest_routine( response = call_gemini( endpoint="routines/suggest", contents=prompt, - config=genai_types.GenerateContentConfig( + config=get_creative_config( system_instruction=_ROUTINES_SYSTEM_PROMPT, - response_mime_type="application/json", response_schema=_SuggestionOut, max_output_tokens=4096, - temperature=0.4, ), user_input=prompt, ) @@ -600,12 +597,10 @@ def suggest_batch( response = call_gemini( endpoint="routines/suggest-batch", contents=prompt, - config=genai_types.GenerateContentConfig( + config=get_creative_config( system_instruction=_ROUTINES_SYSTEM_PROMPT, - response_mime_type="application/json", response_schema=_BatchOut, max_output_tokens=8192, - temperature=0.4, ), user_input=prompt, ) diff --git a/backend/innercontext/api/skincare.py b/backend/innercontext/api/skincare.py index 31407db..8998e50 100644 --- a/backend/innercontext/api/skincare.py +++ b/backend/innercontext/api/skincare.py @@ -11,7 +11,7 @@ from sqlmodel import Session, SQLModel, select from db import get_session from innercontext.api.utils import get_or_404 -from innercontext.llm import call_gemini +from innercontext.llm import call_gemini, get_extraction_config from innercontext.models import ( SkinConditionSnapshot, SkinConditionSnapshotBase, @@ -171,12 +171,10 @@ async def analyze_skin_photos( response = call_gemini( endpoint="skincare/analyze-photos", contents=parts, - config=genai_types.GenerateContentConfig( + config=get_extraction_config( system_instruction=_skin_photo_system_prompt(), - response_mime_type="application/json", response_schema=_SkinAnalysisOut, max_output_tokens=2048, - temperature=0.0, ), user_input=image_summary, ) diff --git a/backend/innercontext/llm.py b/backend/innercontext/llm.py index ea78ecf..3381566 100644 --- a/backend/innercontext/llm.py +++ b/backend/innercontext/llm.py @@ -3,12 +3,50 @@ import os import time from contextlib import suppress +from typing import Any from fastapi import HTTPException from google import genai from google.genai import types as genai_types -_DEFAULT_MODEL = "gemini-flash-latest" +_DEFAULT_MODEL = "gemini-3-flash-preview" + + +def get_extraction_config( + system_instruction: str, + response_schema: Any, + max_output_tokens: int = 8192, +) -> genai_types.GenerateContentConfig: + """Config for strict data extraction (deterministic, minimal thinking).""" + return genai_types.GenerateContentConfig( + system_instruction=system_instruction, + response_mime_type="application/json", + response_schema=response_schema, + max_output_tokens=max_output_tokens, + temperature=0.0, + thinking_config=genai_types.ThinkingConfig( + thinking_level=genai_types.ThinkingLevel.MINIMAL + ), + ) + + +def get_creative_config( + system_instruction: str, + response_schema: Any, + max_output_tokens: int = 4096, +) -> genai_types.GenerateContentConfig: + """Config for creative tasks like recommendations (balanced creativity).""" + return genai_types.GenerateContentConfig( + system_instruction=system_instruction, + response_mime_type="application/json", + response_schema=response_schema, + max_output_tokens=max_output_tokens, + temperature=0.4, + top_p=0.8, + thinking_config=genai_types.ThinkingConfig( + thinking_level=genai_types.ThinkingLevel.LOW + ), + ) def get_gemini_client() -> tuple[genai.Client, str]: @@ -46,18 +84,6 @@ def call_gemini( with suppress(Exception): user_input = str(contents) - # Limit thinking by default — Gemini 3 Flash defaults to "high" thinking which - # consumes most of the token budget before generating actual output. - # Use "low" to reduce latency while keeping basic reasoning intact. - if config.thinking_config is None: - config = config.model_copy( - update={ - "thinking_config": genai_types.ThinkingConfig( - thinking_level=genai_types.ThinkingLevel.LOW - ) - } - ) - start = time.monotonic() success, error_detail, response, finish_reason = True, None, None, None try: diff --git a/backend/test_query.py b/backend/test_query.py new file mode 100644 index 0000000..46b5f20 --- /dev/null +++ b/backend/test_query.py @@ -0,0 +1,25 @@ +from datetime import date, timedelta + +from sqlmodel import select + +from db import get_session +from innercontext.models import Routine, RoutineStep + + +def run(): + session = next(get_session()) + ref_date = date.today() + cutoff = ref_date - timedelta(days=7) + + recent_usage = session.exec( + select(RoutineStep.product_id) + .join(Routine, Routine.id == RoutineStep.routine_id) + .where(Routine.routine_date >= cutoff) + .where(Routine.routine_date <= ref_date) + ).all() + + print("Found:", len(recent_usage)) + + +if __name__ == "__main__": + run()