GT AI OS Community Edition v2.0.33
Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
452
apps/control-panel-backend/app/services/default_models.py
Normal file
452
apps/control-panel-backend/app/services/default_models.py
Normal file
@@ -0,0 +1,452 @@
|
||||
"""
|
||||
Default Model Configurations for GT 2.0
|
||||
|
||||
This module contains the default configuration for all 19 Groq models
|
||||
plus the BGE-M3 embedding model on GT Edge network.
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any
|
||||
|
||||
|
||||
def get_default_models() -> List[Dict[str, Any]]:
|
||||
"""Get list of all default model configurations"""
|
||||
|
||||
# Groq LLM Models (11 models)
|
||||
groq_llm_models = [
|
||||
{
|
||||
"model_id": "llama-3.3-70b-versatile",
|
||||
"name": "Llama 3.3 70B Versatile",
|
||||
"version": "3.3",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 128000,
|
||||
"max_tokens": 32768,
|
||||
},
|
||||
"capabilities": {
|
||||
"reasoning": True,
|
||||
"function_calling": True,
|
||||
"streaming": True,
|
||||
"multilingual": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.59,
|
||||
"per_1k_output": 0.79
|
||||
},
|
||||
"description": "Latest Llama 3.3 70B model optimized for versatile tasks with large context window",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama-3.3-70b-specdec",
|
||||
"name": "Llama 3.3 70B Speculative Decoding",
|
||||
"version": "3.3",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 8192,
|
||||
"max_tokens": 8192,
|
||||
},
|
||||
"capabilities": {
|
||||
"reasoning": True,
|
||||
"function_calling": True,
|
||||
"streaming": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.59,
|
||||
"per_1k_output": 0.79
|
||||
},
|
||||
"description": "Llama 3.3 70B with speculative decoding for faster inference",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama-3.2-90b-text-preview",
|
||||
"name": "Llama 3.2 90B Text Preview",
|
||||
"version": "3.2",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 128000,
|
||||
"max_tokens": 8000,
|
||||
},
|
||||
"capabilities": {
|
||||
"reasoning": True,
|
||||
"function_calling": True,
|
||||
"streaming": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.2,
|
||||
"per_1k_output": 0.2
|
||||
},
|
||||
"description": "Large Llama 3.2 model with enhanced text processing capabilities",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama-3.1-405b-reasoning",
|
||||
"name": "Llama 3.1 405B Reasoning",
|
||||
"version": "3.1",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 131072,
|
||||
"max_tokens": 32768,
|
||||
},
|
||||
"capabilities": {
|
||||
"reasoning": True,
|
||||
"function_calling": True,
|
||||
"streaming": True,
|
||||
"multilingual": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 2.5,
|
||||
"per_1k_output": 2.5
|
||||
},
|
||||
"description": "Largest Llama model optimized for complex reasoning tasks",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama-3.1-70b-versatile",
|
||||
"name": "Llama 3.1 70B Versatile",
|
||||
"version": "3.1",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 131072,
|
||||
"max_tokens": 32768,
|
||||
},
|
||||
"capabilities": {
|
||||
"reasoning": True,
|
||||
"function_calling": True,
|
||||
"streaming": True,
|
||||
"multilingual": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.59,
|
||||
"per_1k_output": 0.79
|
||||
},
|
||||
"description": "Balanced Llama model for general-purpose tasks with large context",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama-3.1-8b-instant",
|
||||
"name": "Llama 3.1 8B Instant",
|
||||
"version": "3.1",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 131072,
|
||||
"max_tokens": 8192,
|
||||
},
|
||||
"capabilities": {
|
||||
"streaming": True,
|
||||
"multilingual": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.05,
|
||||
"per_1k_output": 0.08
|
||||
},
|
||||
"description": "Fast and efficient Llama model for quick responses",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama3-groq-70b-8192-tool-use-preview",
|
||||
"name": "Llama 3 Groq 70B Tool Use Preview",
|
||||
"version": "3.0",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 8192,
|
||||
"max_tokens": 8192,
|
||||
},
|
||||
"capabilities": {
|
||||
"function_calling": True,
|
||||
"streaming": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.89,
|
||||
"per_1k_output": 0.89
|
||||
},
|
||||
"description": "Llama 3 70B optimized for tool use and function calling",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama3-groq-8b-8192-tool-use-preview",
|
||||
"name": "Llama 3 Groq 8B Tool Use Preview",
|
||||
"version": "3.0",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 8192,
|
||||
"max_tokens": 8192,
|
||||
},
|
||||
"capabilities": {
|
||||
"function_calling": True,
|
||||
"streaming": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.19,
|
||||
"per_1k_output": 0.19
|
||||
},
|
||||
"description": "Compact Llama 3 model optimized for tool use and function calling",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "mixtral-8x7b-32768",
|
||||
"name": "Mixtral 8x7B",
|
||||
"version": "1.0",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 32768,
|
||||
"max_tokens": 32768,
|
||||
},
|
||||
"capabilities": {
|
||||
"reasoning": True,
|
||||
"streaming": True,
|
||||
"multilingual": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.24,
|
||||
"per_1k_output": 0.24
|
||||
},
|
||||
"description": "Mixture of experts model with strong multilingual capabilities",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "gemma2-9b-it",
|
||||
"name": "Gemma 2 9B Instruction Tuned",
|
||||
"version": "2.0",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 8192,
|
||||
"max_tokens": 8192,
|
||||
},
|
||||
"capabilities": {
|
||||
"streaming": True,
|
||||
"multilingual": False
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.2,
|
||||
"per_1k_output": 0.2
|
||||
},
|
||||
"description": "Google's Gemma 2 model optimized for instruction following",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama-guard-3-8b",
|
||||
"name": "Llama Guard 3 8B",
|
||||
"version": "3.0",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 8192,
|
||||
"max_tokens": 8192,
|
||||
},
|
||||
"capabilities": {
|
||||
"streaming": False,
|
||||
"safety_classification": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.2,
|
||||
"per_1k_output": 0.2
|
||||
},
|
||||
"description": "Safety classification model for content moderation",
|
||||
"is_active": True
|
||||
}
|
||||
]
|
||||
|
||||
# Groq Audio Models (3 models)
|
||||
groq_audio_models = [
|
||||
{
|
||||
"model_id": "whisper-large-v3",
|
||||
"name": "Whisper Large v3",
|
||||
"version": "3.0",
|
||||
"provider": "groq",
|
||||
"model_type": "audio",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"capabilities": {
|
||||
"transcription": True,
|
||||
"multilingual": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.111,
|
||||
"per_1k_output": 0.111
|
||||
},
|
||||
"description": "High-quality speech transcription with multilingual support",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "whisper-large-v3-turbo",
|
||||
"name": "Whisper Large v3 Turbo",
|
||||
"version": "3.0",
|
||||
"provider": "groq",
|
||||
"model_type": "audio",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"capabilities": {
|
||||
"transcription": True,
|
||||
"multilingual": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.04,
|
||||
"per_1k_output": 0.04
|
||||
},
|
||||
"description": "Fast speech transcription optimized for speed",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "distil-whisper-large-v3-en",
|
||||
"name": "Distil-Whisper Large v3 English",
|
||||
"version": "3.0",
|
||||
"provider": "groq",
|
||||
"model_type": "audio",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"capabilities": {
|
||||
"transcription": True,
|
||||
"multilingual": False
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.02,
|
||||
"per_1k_output": 0.02
|
||||
},
|
||||
"description": "Compact English-only transcription model",
|
||||
"is_active": True
|
||||
}
|
||||
]
|
||||
|
||||
# BGE-M3 Embedding Model (External on GT Edge)
|
||||
external_models = [
|
||||
{
|
||||
"model_id": "bge-m3",
|
||||
"name": "BAAI BGE-M3 Multilingual Embeddings",
|
||||
"version": "1.0",
|
||||
"provider": "external",
|
||||
"model_type": "embedding",
|
||||
"endpoint": "http://10.0.1.50:8080", # GT Edge local network
|
||||
"specifications": {
|
||||
"dimensions": 1024,
|
||||
"max_tokens": 8192,
|
||||
},
|
||||
"capabilities": {
|
||||
"multilingual": True,
|
||||
"dense_retrieval": True,
|
||||
"sparse_retrieval": True,
|
||||
"colbert": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.0,
|
||||
"per_1k_output": 0.0
|
||||
},
|
||||
"description": "State-of-the-art multilingual embedding model running on GT Edge local network",
|
||||
"config": {
|
||||
"batch_size": 32,
|
||||
"normalize": True,
|
||||
"pooling_method": "mean"
|
||||
},
|
||||
"is_active": True
|
||||
}
|
||||
]
|
||||
|
||||
# Local Ollama Models (for on-premise deployments)
|
||||
ollama_models = [
|
||||
{
|
||||
"model_id": "ollama-local-dgx-x86",
|
||||
"name": "Local Ollama (DGX/X86)",
|
||||
"version": "1.0",
|
||||
"provider": "ollama",
|
||||
"model_type": "llm",
|
||||
"endpoint": "http://ollama-host:11434/v1/chat/completions",
|
||||
"api_key_name": None, # No API key needed for local Ollama
|
||||
"specifications": {
|
||||
"context_window": 131072,
|
||||
"max_tokens": 4096,
|
||||
},
|
||||
"capabilities": {
|
||||
"streaming": True,
|
||||
"function_calling": False
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.0,
|
||||
"per_1k_output": 0.0
|
||||
},
|
||||
"description": "Local Ollama instance for DGX and x86 Linux deployments. Uses ollama-host DNS resolution.",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "ollama-local-macos",
|
||||
"name": "Local Ollama (MacOS)",
|
||||
"version": "1.0",
|
||||
"provider": "ollama",
|
||||
"model_type": "llm",
|
||||
"endpoint": "http://host.docker.internal:11434/v1/chat/completions",
|
||||
"api_key_name": None, # No API key needed for local Ollama
|
||||
"specifications": {
|
||||
"context_window": 131072,
|
||||
"max_tokens": 4096,
|
||||
},
|
||||
"capabilities": {
|
||||
"streaming": True,
|
||||
"function_calling": False
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.0,
|
||||
"per_1k_output": 0.0
|
||||
},
|
||||
"description": "Local Ollama instance for macOS deployments. Uses host.docker.internal for Docker-to-host networking.",
|
||||
"is_active": True
|
||||
}
|
||||
]
|
||||
|
||||
# TTS Models (placeholder - will be added when available)
|
||||
tts_models = [
|
||||
# Future TTS models from Groq/PlayAI
|
||||
]
|
||||
|
||||
# Combine all models
|
||||
all_models = groq_llm_models + groq_audio_models + external_models + ollama_models + tts_models
|
||||
|
||||
return all_models
|
||||
|
||||
|
||||
def get_groq_models() -> List[Dict[str, Any]]:
|
||||
"""Get only Groq models"""
|
||||
return [model for model in get_default_models() if model["provider"] == "groq"]
|
||||
|
||||
|
||||
def get_external_models() -> List[Dict[str, Any]]:
|
||||
"""Get only external models (BGE-M3, etc.)"""
|
||||
return [model for model in get_default_models() if model["provider"] == "external"]
|
||||
|
||||
|
||||
def get_ollama_models() -> List[Dict[str, Any]]:
|
||||
"""Get only Ollama models (local inference)"""
|
||||
return [model for model in get_default_models() if model["provider"] == "ollama"]
|
||||
|
||||
|
||||
def get_models_by_type(model_type: str) -> List[Dict[str, Any]]:
|
||||
"""Get models by type (llm, embedding, audio, tts)"""
|
||||
return [model for model in get_default_models() if model["model_type"] == model_type]
|
||||
Reference in New Issue
Block a user