fix: correct max_tokens for Groq models

🤖 Generated with [Claude Code](https://claude.com/claude-code)
This commit is contained in:
HackWeasel
2025-12-16 09:06:59 -05:00
parent dc884df271
commit 81be5f6db8
4 changed files with 48 additions and 3 deletions

View File

@@ -305,6 +305,15 @@ check_migration_027() {
[ "$missing" != "0" ] && [ -n "$missing" ]
}
check_migration_028() {
# Returns true (needs migration) if any model has wrong max_tokens
local wrong_value=$(docker exec gentwo-controlpanel-postgres psql -U postgres -d gt2_admin -tAc \
"SELECT COUNT(*) FROM model_configs WHERE
(model_id = 'llama-3.1-8b-instant' AND max_tokens != 32000) OR
(model_id = 'meta-llama/llama-guard-4-12b' AND max_tokens != 1024);" 2>/dev/null || echo "0")
[ "$wrong_value" != "0" ] && [ -n "$wrong_value" ]
}
# Tenant migration checks
check_migration_T001() {
local exists=$(docker exec gentwo-tenant-postgres-primary psql -U postgres -d gt2_tenants -tAc \
@@ -411,6 +420,9 @@ run_admin_migrations() {
# This fixes partial 021 migrations where models were added but not assigned
run_admin_migration "027" "scripts/migrations/027_assign_nvidia_models_to_tenants.sql" "check_migration_027" || return 1
# Migration 028: Fix Groq model max_tokens (llama-3.1-8b-instant and llama-guard)
run_admin_migration "028" "scripts/migrations/028_fix_groq_max_tokens.sql" "check_migration_028" || return 1
log_success "All admin migrations complete"
return 0
}

View File

@@ -11,7 +11,7 @@ BEGIN
-- LLaMA 3.1 8B Instant
UPDATE model_configs
SET context_window = 131072,
max_tokens = 131072,
max_tokens = 32000,
updated_at = NOW()
WHERE model_id = 'llama-3.1-8b-instant'
AND (context_window IS NULL OR max_tokens IS NULL);

View File

@@ -0,0 +1,33 @@
-- Migration 028: Fix Groq Model Max Tokens
-- Corrects max_tokens for models that had incorrect values in test data
DO $$
DECLARE
updated_count INTEGER := 0;
BEGIN
-- LLaMA 3.1 8B Instant: max_tokens should be 32000 (was incorrectly 131072)
UPDATE model_configs
SET max_tokens = 32000,
updated_at = NOW()
WHERE model_id = 'llama-3.1-8b-instant'
AND max_tokens != 32000;
GET DIAGNOSTICS updated_count = ROW_COUNT;
IF updated_count > 0 THEN
RAISE NOTICE 'Updated % records for llama-3.1-8b-instant max_tokens -> 32000', updated_count;
END IF;
-- LLaMA Guard 4 12B: max_tokens should be 1024 (was incorrectly 8192 in test data)
UPDATE model_configs
SET max_tokens = 1024,
updated_at = NOW()
WHERE model_id = 'meta-llama/llama-guard-4-12b'
AND max_tokens != 1024;
GET DIAGNOSTICS updated_count = ROW_COUNT;
IF updated_count > 0 THEN
RAISE NOTICE 'Updated % records for llama-guard-4-12b max_tokens -> 1024', updated_count;
END IF;
RAISE NOTICE 'Migration 028 complete: Groq max_tokens corrected';
END $$;

View File

@@ -94,7 +94,7 @@ INSERT INTO public.model_configs (
-- Groq Llama 3.1 8B Instant (fast, cheap)
('llama-3.1-8b-instant', 'Groq Llama 3.1 8b Instant', '1.0', 'groq', 'llm',
'https://api.groq.com/openai/v1/chat/completions',
131072, 131072,
131072, 32000,
'{"reasoning": false, "function_calling": false, "vision": false, "audio": false, "streaming": false, "multilingual": false}'::json,
0.05, 0.08, true, 'unknown', 0, 0, 100, 0,
'{"global_access": true}'::json, '[]'::json,
@@ -148,7 +148,7 @@ INSERT INTO public.model_configs (
-- Groq Llama Guard 4 12B (safety/moderation model)
('meta-llama/llama-guard-4-12b', 'Groq Llama Guard 4 12B', '1.0', 'groq', 'llm',
'https://api.groq.com/openai/v1/chat/completions',
131072, 8192,
131072, 1024,
'{"reasoning": false, "function_calling": false, "vision": false, "audio": false, "streaming": false, "multilingual": false}'::json,
0.20, 0.20, true, 'unknown', 0, 0, 100, 0,
'{"global_access": true}'::json, '[]'::json,