From 4e014fa648d1a8c58f2da6fedf7b89f3fdc3b14f Mon Sep 17 00:00:00 2001 From: "khalid@traclabs.com" Date: Thu, 23 Apr 2026 08:26:24 -0500 Subject: [PATCH] Switch to a smaller intent model --- .env.example | 3 +++ server/src/llm/client.ts | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.env.example b/.env.example index 19b1fc6..c9674be 100644 --- a/.env.example +++ b/.env.example @@ -5,6 +5,9 @@ API_EDIT_SECRET=change-me-to-a-random-string OLLAMA_API_KEY= # For Ollama Cloud use https://ollama.com, for local Ollama use http://localhost:11434 OLLAMA_HOST=https://ollama.com +OLLAMA_MODEL=qwen3.5:397b-cloud +OLLAMA_INTENT_MODEL=gemma4:31b-cloud +OLLAMA_FALLBACK_MODEL=gpt-oss:120b # Paths REPO_ROOT=. diff --git a/server/src/llm/client.ts b/server/src/llm/client.ts index d162a18..25435f4 100644 --- a/server/src/llm/client.ts +++ b/server/src/llm/client.ts @@ -6,6 +6,7 @@ const OLLAMA_HOST = process.env.OLLAMA_HOST || 'http://localhost:11434'; const OLLAMA_API_KEY = process.env.OLLAMA_API_KEY || ''; const PRIMARY_MODEL = process.env.OLLAMA_MODEL || 'qwen3.5:397b-cloud'; const FALLBACK_MODEL = process.env.OLLAMA_FALLBACK_MODEL || 'gpt-oss:120b'; +const INTENT_MODEL = process.env.OLLAMA_INTENT_MODEL || 'gemma4:31b-cloud'; const MAX_RETRIES = 3; export interface LlmChatCaller { @@ -39,9 +40,10 @@ async function generateWithValidation(params: { messages: Array<{ role: string; content: string }>; schema: z.ZodType; chat?: LlmChatCaller; + models?: string[]; }): Promise { const chat = params.chat || ollamaChat; - const models = [PRIMARY_MODEL, FALLBACK_MODEL]; + const models = params.models?.length ? params.models : [PRIMARY_MODEL, FALLBACK_MODEL]; for (const model of models) { const msgs = [...params.messages]; @@ -193,7 +195,7 @@ Examples: }, ]; - return generateWithValidation({ messages, schema: classificationSchema, chat }); + return generateWithValidation({ messages, schema: classificationSchema, chat, models: [INTENT_MODEL, FALLBACK_MODEL] }); } // ── Info Response Generation ──