GT AI OS Community Edition v2.0.33

Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 17:04:45 -05:00
commit b9dfb86260
746 changed files with 232071 additions and 0 deletions
--- a/apps/control-panel-backend/app/services/default_models.py
+++ b/apps/control-panel-backend/app/services/default_models.py
@@ -0,0 +1,452 @@
+"""
+Default Model Configurations for GT 2.0
+
+This module contains the default configuration for all 19 Groq models
+plus the BGE-M3 embedding model on GT Edge network.
+"""
+
+from typing import List, Dict, Any
+
+
+def get_default_models() -> List[Dict[str, Any]]:
+    """Get list of all default model configurations"""
+    
+    # Groq LLM Models (11 models)
+    groq_llm_models = [
+        {
+            "model_id": "llama-3.3-70b-versatile",
+            "name": "Llama 3.3 70B Versatile",
+            "version": "3.3",
+            "provider": "groq",
+            "model_type": "llm",
+            "endpoint": "https://api.groq.com/openai/v1",
+            "api_key_name": "GROQ_API_KEY",
+            "specifications": {
+                "context_window": 128000,
+                "max_tokens": 32768,
+            },
+            "capabilities": {
+                "reasoning": True,
+                "function_calling": True,
+                "streaming": True,
+                "multilingual": True
+            },
+            "cost": {
+                "per_1k_input": 0.59,
+                "per_1k_output": 0.79
+            },
+            "description": "Latest Llama 3.3 70B model optimized for versatile tasks with large context window",
+            "is_active": True
+        },
+        {
+            "model_id": "llama-3.3-70b-specdec",
+            "name": "Llama 3.3 70B Speculative Decoding",
+            "version": "3.3",
+            "provider": "groq",
+            "model_type": "llm",
+            "endpoint": "https://api.groq.com/openai/v1",
+            "api_key_name": "GROQ_API_KEY",
+            "specifications": {
+                "context_window": 8192,
+                "max_tokens": 8192,
+            },
+            "capabilities": {
+                "reasoning": True,
+                "function_calling": True,
+                "streaming": True
+            },
+            "cost": {
+                "per_1k_input": 0.59,
+                "per_1k_output": 0.79
+            },
+            "description": "Llama 3.3 70B with speculative decoding for faster inference",
+            "is_active": True
+        },
+        {
+            "model_id": "llama-3.2-90b-text-preview",
+            "name": "Llama 3.2 90B Text Preview",
+            "version": "3.2",
+            "provider": "groq",
+            "model_type": "llm",
+            "endpoint": "https://api.groq.com/openai/v1",
+            "api_key_name": "GROQ_API_KEY",
+            "specifications": {
+                "context_window": 128000,
+                "max_tokens": 8000,
+            },
+            "capabilities": {
+                "reasoning": True,
+                "function_calling": True,
+                "streaming": True
+            },
+            "cost": {
+                "per_1k_input": 0.2,
+                "per_1k_output": 0.2
+            },
+            "description": "Large Llama 3.2 model with enhanced text processing capabilities",
+            "is_active": True
+        },
+        {
+            "model_id": "llama-3.1-405b-reasoning",
+            "name": "Llama 3.1 405B Reasoning",
+            "version": "3.1",
+            "provider": "groq",
+            "model_type": "llm",
+            "endpoint": "https://api.groq.com/openai/v1",
+            "api_key_name": "GROQ_API_KEY",
+            "specifications": {
+                "context_window": 131072,
+                "max_tokens": 32768,
+            },
+            "capabilities": {
+                "reasoning": True,
+                "function_calling": True,
+                "streaming": True,
+                "multilingual": True
+            },
+            "cost": {
+                "per_1k_input": 2.5,
+                "per_1k_output": 2.5
+            },
+            "description": "Largest Llama model optimized for complex reasoning tasks",
+            "is_active": True
+        },
+        {
+            "model_id": "llama-3.1-70b-versatile",
+            "name": "Llama 3.1 70B Versatile",
+            "version": "3.1",
+            "provider": "groq",
+            "model_type": "llm",
+            "endpoint": "https://api.groq.com/openai/v1",
+            "api_key_name": "GROQ_API_KEY",
+            "specifications": {
+                "context_window": 131072,
+                "max_tokens": 32768,
+            },
+            "capabilities": {
+                "reasoning": True,
+                "function_calling": True,
+                "streaming": True,
+                "multilingual": True
+            },
+            "cost": {
+                "per_1k_input": 0.59,
+                "per_1k_output": 0.79
+            },
+            "description": "Balanced Llama model for general-purpose tasks with large context",
+            "is_active": True
+        },
+        {
+            "model_id": "llama-3.1-8b-instant",
+            "name": "Llama 3.1 8B Instant",
+            "version": "3.1",
+            "provider": "groq",
+            "model_type": "llm",
+            "endpoint": "https://api.groq.com/openai/v1",
+            "api_key_name": "GROQ_API_KEY",
+            "specifications": {
+                "context_window": 131072,
+                "max_tokens": 8192,
+            },
+            "capabilities": {
+                "streaming": True,
+                "multilingual": True
+            },
+            "cost": {
+                "per_1k_input": 0.05,
+                "per_1k_output": 0.08
+            },
+            "description": "Fast and efficient Llama model for quick responses",
+            "is_active": True
+        },
+        {
+            "model_id": "llama3-groq-70b-8192-tool-use-preview",
+            "name": "Llama 3 Groq 70B Tool Use Preview",
+            "version": "3.0",
+            "provider": "groq",
+            "model_type": "llm",
+            "endpoint": "https://api.groq.com/openai/v1",
+            "api_key_name": "GROQ_API_KEY",
+            "specifications": {
+                "context_window": 8192,
+                "max_tokens": 8192,
+            },
+            "capabilities": {
+                "function_calling": True,
+                "streaming": True
+            },
+            "cost": {
+                "per_1k_input": 0.89,
+                "per_1k_output": 0.89
+            },
+            "description": "Llama 3 70B optimized for tool use and function calling",
+            "is_active": True
+        },
+        {
+            "model_id": "llama3-groq-8b-8192-tool-use-preview",
+            "name": "Llama 3 Groq 8B Tool Use Preview",
+            "version": "3.0",
+            "provider": "groq",
+            "model_type": "llm",
+            "endpoint": "https://api.groq.com/openai/v1",
+            "api_key_name": "GROQ_API_KEY",
+            "specifications": {
+                "context_window": 8192,
+                "max_tokens": 8192,
+            },
+            "capabilities": {
+                "function_calling": True,
+                "streaming": True
+            },
+            "cost": {
+                "per_1k_input": 0.19,
+                "per_1k_output": 0.19
+            },
+            "description": "Compact Llama 3 model optimized for tool use and function calling",
+            "is_active": True
+        },
+        {
+            "model_id": "mixtral-8x7b-32768",
+            "name": "Mixtral 8x7B",
+            "version": "1.0",
+            "provider": "groq",
+            "model_type": "llm",
+            "endpoint": "https://api.groq.com/openai/v1",
+            "api_key_name": "GROQ_API_KEY",
+            "specifications": {
+                "context_window": 32768,
+                "max_tokens": 32768,
+            },
+            "capabilities": {
+                "reasoning": True,
+                "streaming": True,
+                "multilingual": True
+            },
+            "cost": {
+                "per_1k_input": 0.24,
+                "per_1k_output": 0.24
+            },
+            "description": "Mixture of experts model with strong multilingual capabilities",
+            "is_active": True
+        },
+        {
+            "model_id": "gemma2-9b-it",
+            "name": "Gemma 2 9B Instruction Tuned",
+            "version": "2.0",
+            "provider": "groq",
+            "model_type": "llm",
+            "endpoint": "https://api.groq.com/openai/v1",
+            "api_key_name": "GROQ_API_KEY",
+            "specifications": {
+                "context_window": 8192,
+                "max_tokens": 8192,
+            },
+            "capabilities": {
+                "streaming": True,
+                "multilingual": False
+            },
+            "cost": {
+                "per_1k_input": 0.2,
+                "per_1k_output": 0.2
+            },
+            "description": "Google's Gemma 2 model optimized for instruction following",
+            "is_active": True
+        },
+        {
+            "model_id": "llama-guard-3-8b",
+            "name": "Llama Guard 3 8B",
+            "version": "3.0",
+            "provider": "groq",
+            "model_type": "llm",
+            "endpoint": "https://api.groq.com/openai/v1",
+            "api_key_name": "GROQ_API_KEY",
+            "specifications": {
+                "context_window": 8192,
+                "max_tokens": 8192,
+            },
+            "capabilities": {
+                "streaming": False,
+                "safety_classification": True
+            },
+            "cost": {
+                "per_1k_input": 0.2,
+                "per_1k_output": 0.2
+            },
+            "description": "Safety classification model for content moderation",
+            "is_active": True
+        }
+    ]
+    
+    # Groq Audio Models (3 models)
+    groq_audio_models = [
+        {
+            "model_id": "whisper-large-v3",
+            "name": "Whisper Large v3",
+            "version": "3.0",
+            "provider": "groq",
+            "model_type": "audio",
+            "endpoint": "https://api.groq.com/openai/v1",
+            "api_key_name": "GROQ_API_KEY",
+            "capabilities": {
+                "transcription": True,
+                "multilingual": True
+            },
+            "cost": {
+                "per_1k_input": 0.111,
+                "per_1k_output": 0.111
+            },
+            "description": "High-quality speech transcription with multilingual support",
+            "is_active": True
+        },
+        {
+            "model_id": "whisper-large-v3-turbo",
+            "name": "Whisper Large v3 Turbo",
+            "version": "3.0",
+            "provider": "groq",
+            "model_type": "audio",
+            "endpoint": "https://api.groq.com/openai/v1",
+            "api_key_name": "GROQ_API_KEY",
+            "capabilities": {
+                "transcription": True,
+                "multilingual": True
+            },
+            "cost": {
+                "per_1k_input": 0.04,
+                "per_1k_output": 0.04
+            },
+            "description": "Fast speech transcription optimized for speed",
+            "is_active": True
+        },
+        {
+            "model_id": "distil-whisper-large-v3-en",
+            "name": "Distil-Whisper Large v3 English",
+            "version": "3.0",
+            "provider": "groq",
+            "model_type": "audio",
+            "endpoint": "https://api.groq.com/openai/v1",
+            "api_key_name": "GROQ_API_KEY",
+            "capabilities": {
+                "transcription": True,
+                "multilingual": False
+            },
+            "cost": {
+                "per_1k_input": 0.02,
+                "per_1k_output": 0.02
+            },
+            "description": "Compact English-only transcription model",
+            "is_active": True
+        }
+    ]
+    
+    # BGE-M3 Embedding Model (External on GT Edge)
+    external_models = [
+        {
+            "model_id": "bge-m3",
+            "name": "BAAI BGE-M3 Multilingual Embeddings",
+            "version": "1.0",
+            "provider": "external",
+            "model_type": "embedding",
+            "endpoint": "http://10.0.1.50:8080",  # GT Edge local network
+            "specifications": {
+                "dimensions": 1024,
+                "max_tokens": 8192,
+            },
+            "capabilities": {
+                "multilingual": True,
+                "dense_retrieval": True,
+                "sparse_retrieval": True,
+                "colbert": True
+            },
+            "cost": {
+                "per_1k_input": 0.0,
+                "per_1k_output": 0.0
+            },
+            "description": "State-of-the-art multilingual embedding model running on GT Edge local network",
+            "config": {
+                "batch_size": 32,
+                "normalize": True,
+                "pooling_method": "mean"
+            },
+            "is_active": True
+        }
+    ]
+    
+    # Local Ollama Models (for on-premise deployments)
+    ollama_models = [
+        {
+            "model_id": "ollama-local-dgx-x86",
+            "name": "Local Ollama (DGX/X86)",
+            "version": "1.0",
+            "provider": "ollama",
+            "model_type": "llm",
+            "endpoint": "http://ollama-host:11434/v1/chat/completions",
+            "api_key_name": None,  # No API key needed for local Ollama
+            "specifications": {
+                "context_window": 131072,
+                "max_tokens": 4096,
+            },
+            "capabilities": {
+                "streaming": True,
+                "function_calling": False
+            },
+            "cost": {
+                "per_1k_input": 0.0,
+                "per_1k_output": 0.0
+            },
+            "description": "Local Ollama instance for DGX and x86 Linux deployments. Uses ollama-host DNS resolution.",
+            "is_active": True
+        },
+        {
+            "model_id": "ollama-local-macos",
+            "name": "Local Ollama (MacOS)",
+            "version": "1.0",
+            "provider": "ollama",
+            "model_type": "llm",
+            "endpoint": "http://host.docker.internal:11434/v1/chat/completions",
+            "api_key_name": None,  # No API key needed for local Ollama
+            "specifications": {
+                "context_window": 131072,
+                "max_tokens": 4096,
+            },
+            "capabilities": {
+                "streaming": True,
+                "function_calling": False
+            },
+            "cost": {
+                "per_1k_input": 0.0,
+                "per_1k_output": 0.0
+            },
+            "description": "Local Ollama instance for macOS deployments. Uses host.docker.internal for Docker-to-host networking.",
+            "is_active": True
+        }
+    ]
+
+    # TTS Models (placeholder - will be added when available)
+    tts_models = [
+        # Future TTS models from Groq/PlayAI
+    ]
+
+    # Combine all models
+    all_models = groq_llm_models + groq_audio_models + external_models + ollama_models + tts_models
+    
+    return all_models
+
+
+def get_groq_models() -> List[Dict[str, Any]]:
+    """Get only Groq models"""
+    return [model for model in get_default_models() if model["provider"] == "groq"]
+
+
+def get_external_models() -> List[Dict[str, Any]]:
+    """Get only external models (BGE-M3, etc.)"""
+    return [model for model in get_default_models() if model["provider"] == "external"]
+
+
+def get_ollama_models() -> List[Dict[str, Any]]:
+    """Get only Ollama models (local inference)"""
+    return [model for model in get_default_models() if model["provider"] == "ollama"]
+
+
+def get_models_by_type(model_type: str) -> List[Dict[str, Any]]:
+    """Get models by type (llm, embedding, audio, tts)"""
+    return [model for model in get_default_models() if model["model_type"] == model_type]