From 81be5f6db82d86265611a42ae69f1a4b355d63a7 Mon Sep 17 00:00:00 2001 From: HackWeasel Date: Tue, 16 Dec 2025 09:06:59 -0500 Subject: [PATCH] fix: correct max_tokens for Groq models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) --- scripts/lib/migrations.sh | 12 +++++++ .../010_update_model_context_windows.sql | 2 +- .../migrations/028_fix_groq_max_tokens.sql | 33 +++++++++++++++++++ .../unified/05-create-test-data.sql | 4 +-- 4 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 scripts/migrations/028_fix_groq_max_tokens.sql diff --git a/scripts/lib/migrations.sh b/scripts/lib/migrations.sh index 0502a19..a38ac3c 100755 --- a/scripts/lib/migrations.sh +++ b/scripts/lib/migrations.sh @@ -305,6 +305,15 @@ check_migration_027() { [ "$missing" != "0" ] && [ -n "$missing" ] } +check_migration_028() { + # Returns true (needs migration) if any model has wrong max_tokens + local wrong_value=$(docker exec gentwo-controlpanel-postgres psql -U postgres -d gt2_admin -tAc \ + "SELECT COUNT(*) FROM model_configs WHERE + (model_id = 'llama-3.1-8b-instant' AND max_tokens != 32000) OR + (model_id = 'meta-llama/llama-guard-4-12b' AND max_tokens != 1024);" 2>/dev/null || echo "0") + [ "$wrong_value" != "0" ] && [ -n "$wrong_value" ] +} + # Tenant migration checks check_migration_T001() { local exists=$(docker exec gentwo-tenant-postgres-primary psql -U postgres -d gt2_tenants -tAc \ @@ -411,6 +420,9 @@ run_admin_migrations() { # This fixes partial 021 migrations where models were added but not assigned run_admin_migration "027" "scripts/migrations/027_assign_nvidia_models_to_tenants.sql" "check_migration_027" || return 1 + # Migration 028: Fix Groq model max_tokens (llama-3.1-8b-instant and llama-guard) + run_admin_migration "028" "scripts/migrations/028_fix_groq_max_tokens.sql" "check_migration_028" || return 1 + log_success "All admin migrations complete" return 0 } diff --git a/scripts/migrations/010_update_model_context_windows.sql b/scripts/migrations/010_update_model_context_windows.sql index fce9bc7..2a10ac4 100644 --- a/scripts/migrations/010_update_model_context_windows.sql +++ b/scripts/migrations/010_update_model_context_windows.sql @@ -11,7 +11,7 @@ BEGIN -- LLaMA 3.1 8B Instant UPDATE model_configs SET context_window = 131072, - max_tokens = 131072, + max_tokens = 32000, updated_at = NOW() WHERE model_id = 'llama-3.1-8b-instant' AND (context_window IS NULL OR max_tokens IS NULL); diff --git a/scripts/migrations/028_fix_groq_max_tokens.sql b/scripts/migrations/028_fix_groq_max_tokens.sql new file mode 100644 index 0000000..498d3ac --- /dev/null +++ b/scripts/migrations/028_fix_groq_max_tokens.sql @@ -0,0 +1,33 @@ +-- Migration 028: Fix Groq Model Max Tokens +-- Corrects max_tokens for models that had incorrect values in test data + +DO $$ +DECLARE + updated_count INTEGER := 0; +BEGIN + -- LLaMA 3.1 8B Instant: max_tokens should be 32000 (was incorrectly 131072) + UPDATE model_configs + SET max_tokens = 32000, + updated_at = NOW() + WHERE model_id = 'llama-3.1-8b-instant' + AND max_tokens != 32000; + + GET DIAGNOSTICS updated_count = ROW_COUNT; + IF updated_count > 0 THEN + RAISE NOTICE 'Updated % records for llama-3.1-8b-instant max_tokens -> 32000', updated_count; + END IF; + + -- LLaMA Guard 4 12B: max_tokens should be 1024 (was incorrectly 8192 in test data) + UPDATE model_configs + SET max_tokens = 1024, + updated_at = NOW() + WHERE model_id = 'meta-llama/llama-guard-4-12b' + AND max_tokens != 1024; + + GET DIAGNOSTICS updated_count = ROW_COUNT; + IF updated_count > 0 THEN + RAISE NOTICE 'Updated % records for llama-guard-4-12b max_tokens -> 1024', updated_count; + END IF; + + RAISE NOTICE 'Migration 028 complete: Groq max_tokens corrected'; +END $$; diff --git a/scripts/postgresql/unified/05-create-test-data.sql b/scripts/postgresql/unified/05-create-test-data.sql index 1dc1ff3..2ad43e2 100644 --- a/scripts/postgresql/unified/05-create-test-data.sql +++ b/scripts/postgresql/unified/05-create-test-data.sql @@ -94,7 +94,7 @@ INSERT INTO public.model_configs ( -- Groq Llama 3.1 8B Instant (fast, cheap) ('llama-3.1-8b-instant', 'Groq Llama 3.1 8b Instant', '1.0', 'groq', 'llm', 'https://api.groq.com/openai/v1/chat/completions', - 131072, 131072, + 131072, 32000, '{"reasoning": false, "function_calling": false, "vision": false, "audio": false, "streaming": false, "multilingual": false}'::json, 0.05, 0.08, true, 'unknown', 0, 0, 100, 0, '{"global_access": true}'::json, '[]'::json, @@ -148,7 +148,7 @@ INSERT INTO public.model_configs ( -- Groq Llama Guard 4 12B (safety/moderation model) ('meta-llama/llama-guard-4-12b', 'Groq Llama Guard 4 12B', '1.0', 'groq', 'llm', 'https://api.groq.com/openai/v1/chat/completions', - 131072, 8192, + 131072, 1024, '{"reasoning": false, "function_calling": false, "vision": false, "audio": false, "streaming": false, "multilingual": false}'::json, 0.20, 0.20, true, 'unknown', 0, 0, 100, 0, '{"global_access": true}'::json, '[]'::json,