Switch to a smaller intent model
This commit is contained in:
@@ -5,6 +5,9 @@ API_EDIT_SECRET=change-me-to-a-random-string
|
|||||||
OLLAMA_API_KEY=
|
OLLAMA_API_KEY=
|
||||||
# For Ollama Cloud use https://ollama.com, for local Ollama use http://localhost:11434
|
# For Ollama Cloud use https://ollama.com, for local Ollama use http://localhost:11434
|
||||||
OLLAMA_HOST=https://ollama.com
|
OLLAMA_HOST=https://ollama.com
|
||||||
|
OLLAMA_MODEL=qwen3.5:397b-cloud
|
||||||
|
OLLAMA_INTENT_MODEL=gemma4:31b-cloud
|
||||||
|
OLLAMA_FALLBACK_MODEL=gpt-oss:120b
|
||||||
|
|
||||||
# Paths
|
# Paths
|
||||||
REPO_ROOT=.
|
REPO_ROOT=.
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ const OLLAMA_HOST = process.env.OLLAMA_HOST || 'http://localhost:11434';
|
|||||||
const OLLAMA_API_KEY = process.env.OLLAMA_API_KEY || '';
|
const OLLAMA_API_KEY = process.env.OLLAMA_API_KEY || '';
|
||||||
const PRIMARY_MODEL = process.env.OLLAMA_MODEL || 'qwen3.5:397b-cloud';
|
const PRIMARY_MODEL = process.env.OLLAMA_MODEL || 'qwen3.5:397b-cloud';
|
||||||
const FALLBACK_MODEL = process.env.OLLAMA_FALLBACK_MODEL || 'gpt-oss:120b';
|
const FALLBACK_MODEL = process.env.OLLAMA_FALLBACK_MODEL || 'gpt-oss:120b';
|
||||||
|
const INTENT_MODEL = process.env.OLLAMA_INTENT_MODEL || 'gemma4:31b-cloud';
|
||||||
const MAX_RETRIES = 3;
|
const MAX_RETRIES = 3;
|
||||||
|
|
||||||
export interface LlmChatCaller {
|
export interface LlmChatCaller {
|
||||||
@@ -39,9 +40,10 @@ async function generateWithValidation<T>(params: {
|
|||||||
messages: Array<{ role: string; content: string }>;
|
messages: Array<{ role: string; content: string }>;
|
||||||
schema: z.ZodType<T>;
|
schema: z.ZodType<T>;
|
||||||
chat?: LlmChatCaller;
|
chat?: LlmChatCaller;
|
||||||
|
models?: string[];
|
||||||
}): Promise<T> {
|
}): Promise<T> {
|
||||||
const chat = params.chat || ollamaChat;
|
const chat = params.chat || ollamaChat;
|
||||||
const models = [PRIMARY_MODEL, FALLBACK_MODEL];
|
const models = params.models?.length ? params.models : [PRIMARY_MODEL, FALLBACK_MODEL];
|
||||||
|
|
||||||
for (const model of models) {
|
for (const model of models) {
|
||||||
const msgs = [...params.messages];
|
const msgs = [...params.messages];
|
||||||
@@ -193,7 +195,7 @@ Examples:
|
|||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
return generateWithValidation({ messages, schema: classificationSchema, chat });
|
return generateWithValidation({ messages, schema: classificationSchema, chat, models: [INTENT_MODEL, FALLBACK_MODEL] });
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Info Response Generation ──
|
// ── Info Response Generation ──
|
||||||
|
|||||||
Reference in New Issue
Block a user