Files
gt-ai-os-community/apps/control-panel-backend/app/services/default_models.py
HackWeasel 310491a557 GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents
- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2
- Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2
  - Made more general-purpose (flexible targets, expanded tools)
- Added nemotron-mini-agent.csv for fast local inference via Ollama
- Added nemotron-agent.csv for advanced reasoning via Ollama
- Added wiki page: Projects for NVIDIA NIMs and Nemotron
2025-12-12 17:47:14 -05:00

452 lines
15 KiB
Python

"""
Default Model Configurations for GT 2.0
This module contains the default configuration for all 19 Groq models
plus the BGE-M3 embedding model on GT Edge network.
"""
from typing import List, Dict, Any
def get_default_models() -> List[Dict[str, Any]]:
"""Get list of all default model configurations"""
# Groq LLM Models (11 models)
groq_llm_models = [
{
"model_id": "llama-3.3-70b-versatile",
"name": "Llama 3.3 70B Versatile",
"version": "3.3",
"provider": "groq",
"model_type": "llm",
"endpoint": "https://api.groq.com/openai/v1",
"api_key_name": "GROQ_API_KEY",
"specifications": {
"context_window": 128000,
"max_tokens": 32768,
},
"capabilities": {
"reasoning": True,
"function_calling": True,
"streaming": True,
"multilingual": True
},
"cost": {
"per_1k_input": 0.59,
"per_1k_output": 0.79
},
"description": "Latest Llama 3.3 70B model optimized for versatile tasks with large context window",
"is_active": True
},
{
"model_id": "llama-3.3-70b-specdec",
"name": "Llama 3.3 70B Speculative Decoding",
"version": "3.3",
"provider": "groq",
"model_type": "llm",
"endpoint": "https://api.groq.com/openai/v1",
"api_key_name": "GROQ_API_KEY",
"specifications": {
"context_window": 8192,
"max_tokens": 8192,
},
"capabilities": {
"reasoning": True,
"function_calling": True,
"streaming": True
},
"cost": {
"per_1k_input": 0.59,
"per_1k_output": 0.79
},
"description": "Llama 3.3 70B with speculative decoding for faster inference",
"is_active": True
},
{
"model_id": "llama-3.2-90b-text-preview",
"name": "Llama 3.2 90B Text Preview",
"version": "3.2",
"provider": "groq",
"model_type": "llm",
"endpoint": "https://api.groq.com/openai/v1",
"api_key_name": "GROQ_API_KEY",
"specifications": {
"context_window": 128000,
"max_tokens": 8000,
},
"capabilities": {
"reasoning": True,
"function_calling": True,
"streaming": True
},
"cost": {
"per_1k_input": 0.2,
"per_1k_output": 0.2
},
"description": "Large Llama 3.2 model with enhanced text processing capabilities",
"is_active": True
},
{
"model_id": "llama-3.1-405b-reasoning",
"name": "Llama 3.1 405B Reasoning",
"version": "3.1",
"provider": "groq",
"model_type": "llm",
"endpoint": "https://api.groq.com/openai/v1",
"api_key_name": "GROQ_API_KEY",
"specifications": {
"context_window": 131072,
"max_tokens": 32768,
},
"capabilities": {
"reasoning": True,
"function_calling": True,
"streaming": True,
"multilingual": True
},
"cost": {
"per_1k_input": 2.5,
"per_1k_output": 2.5
},
"description": "Largest Llama model optimized for complex reasoning tasks",
"is_active": True
},
{
"model_id": "llama-3.1-70b-versatile",
"name": "Llama 3.1 70B Versatile",
"version": "3.1",
"provider": "groq",
"model_type": "llm",
"endpoint": "https://api.groq.com/openai/v1",
"api_key_name": "GROQ_API_KEY",
"specifications": {
"context_window": 131072,
"max_tokens": 32768,
},
"capabilities": {
"reasoning": True,
"function_calling": True,
"streaming": True,
"multilingual": True
},
"cost": {
"per_1k_input": 0.59,
"per_1k_output": 0.79
},
"description": "Balanced Llama model for general-purpose tasks with large context",
"is_active": True
},
{
"model_id": "llama-3.1-8b-instant",
"name": "Llama 3.1 8B Instant",
"version": "3.1",
"provider": "groq",
"model_type": "llm",
"endpoint": "https://api.groq.com/openai/v1",
"api_key_name": "GROQ_API_KEY",
"specifications": {
"context_window": 131072,
"max_tokens": 8192,
},
"capabilities": {
"streaming": True,
"multilingual": True
},
"cost": {
"per_1k_input": 0.05,
"per_1k_output": 0.08
},
"description": "Fast and efficient Llama model for quick responses",
"is_active": True
},
{
"model_id": "llama3-groq-70b-8192-tool-use-preview",
"name": "Llama 3 Groq 70B Tool Use Preview",
"version": "3.0",
"provider": "groq",
"model_type": "llm",
"endpoint": "https://api.groq.com/openai/v1",
"api_key_name": "GROQ_API_KEY",
"specifications": {
"context_window": 8192,
"max_tokens": 8192,
},
"capabilities": {
"function_calling": True,
"streaming": True
},
"cost": {
"per_1k_input": 0.89,
"per_1k_output": 0.89
},
"description": "Llama 3 70B optimized for tool use and function calling",
"is_active": True
},
{
"model_id": "llama3-groq-8b-8192-tool-use-preview",
"name": "Llama 3 Groq 8B Tool Use Preview",
"version": "3.0",
"provider": "groq",
"model_type": "llm",
"endpoint": "https://api.groq.com/openai/v1",
"api_key_name": "GROQ_API_KEY",
"specifications": {
"context_window": 8192,
"max_tokens": 8192,
},
"capabilities": {
"function_calling": True,
"streaming": True
},
"cost": {
"per_1k_input": 0.19,
"per_1k_output": 0.19
},
"description": "Compact Llama 3 model optimized for tool use and function calling",
"is_active": True
},
{
"model_id": "mixtral-8x7b-32768",
"name": "Mixtral 8x7B",
"version": "1.0",
"provider": "groq",
"model_type": "llm",
"endpoint": "https://api.groq.com/openai/v1",
"api_key_name": "GROQ_API_KEY",
"specifications": {
"context_window": 32768,
"max_tokens": 32768,
},
"capabilities": {
"reasoning": True,
"streaming": True,
"multilingual": True
},
"cost": {
"per_1k_input": 0.24,
"per_1k_output": 0.24
},
"description": "Mixture of experts model with strong multilingual capabilities",
"is_active": True
},
{
"model_id": "gemma2-9b-it",
"name": "Gemma 2 9B Instruction Tuned",
"version": "2.0",
"provider": "groq",
"model_type": "llm",
"endpoint": "https://api.groq.com/openai/v1",
"api_key_name": "GROQ_API_KEY",
"specifications": {
"context_window": 8192,
"max_tokens": 8192,
},
"capabilities": {
"streaming": True,
"multilingual": False
},
"cost": {
"per_1k_input": 0.2,
"per_1k_output": 0.2
},
"description": "Google's Gemma 2 model optimized for instruction following",
"is_active": True
},
{
"model_id": "llama-guard-3-8b",
"name": "Llama Guard 3 8B",
"version": "3.0",
"provider": "groq",
"model_type": "llm",
"endpoint": "https://api.groq.com/openai/v1",
"api_key_name": "GROQ_API_KEY",
"specifications": {
"context_window": 8192,
"max_tokens": 8192,
},
"capabilities": {
"streaming": False,
"safety_classification": True
},
"cost": {
"per_1k_input": 0.2,
"per_1k_output": 0.2
},
"description": "Safety classification model for content moderation",
"is_active": True
}
]
# Groq Audio Models (3 models)
groq_audio_models = [
{
"model_id": "whisper-large-v3",
"name": "Whisper Large v3",
"version": "3.0",
"provider": "groq",
"model_type": "audio",
"endpoint": "https://api.groq.com/openai/v1",
"api_key_name": "GROQ_API_KEY",
"capabilities": {
"transcription": True,
"multilingual": True
},
"cost": {
"per_1k_input": 0.111,
"per_1k_output": 0.111
},
"description": "High-quality speech transcription with multilingual support",
"is_active": True
},
{
"model_id": "whisper-large-v3-turbo",
"name": "Whisper Large v3 Turbo",
"version": "3.0",
"provider": "groq",
"model_type": "audio",
"endpoint": "https://api.groq.com/openai/v1",
"api_key_name": "GROQ_API_KEY",
"capabilities": {
"transcription": True,
"multilingual": True
},
"cost": {
"per_1k_input": 0.04,
"per_1k_output": 0.04
},
"description": "Fast speech transcription optimized for speed",
"is_active": True
},
{
"model_id": "distil-whisper-large-v3-en",
"name": "Distil-Whisper Large v3 English",
"version": "3.0",
"provider": "groq",
"model_type": "audio",
"endpoint": "https://api.groq.com/openai/v1",
"api_key_name": "GROQ_API_KEY",
"capabilities": {
"transcription": True,
"multilingual": False
},
"cost": {
"per_1k_input": 0.02,
"per_1k_output": 0.02
},
"description": "Compact English-only transcription model",
"is_active": True
}
]
# BGE-M3 Embedding Model (External on GT Edge)
external_models = [
{
"model_id": "bge-m3",
"name": "BAAI BGE-M3 Multilingual Embeddings",
"version": "1.0",
"provider": "external",
"model_type": "embedding",
"endpoint": "http://10.0.1.50:8080", # GT Edge local network
"specifications": {
"dimensions": 1024,
"max_tokens": 8192,
},
"capabilities": {
"multilingual": True,
"dense_retrieval": True,
"sparse_retrieval": True,
"colbert": True
},
"cost": {
"per_1k_input": 0.0,
"per_1k_output": 0.0
},
"description": "State-of-the-art multilingual embedding model running on GT Edge local network",
"config": {
"batch_size": 32,
"normalize": True,
"pooling_method": "mean"
},
"is_active": True
}
]
# Local Ollama Models (for on-premise deployments)
ollama_models = [
{
"model_id": "ollama-local-dgx-x86",
"name": "Local Ollama (DGX/X86)",
"version": "1.0",
"provider": "ollama",
"model_type": "llm",
"endpoint": "http://ollama-host:11434/v1/chat/completions",
"api_key_name": None, # No API key needed for local Ollama
"specifications": {
"context_window": 131072,
"max_tokens": 4096,
},
"capabilities": {
"streaming": True,
"function_calling": False
},
"cost": {
"per_1k_input": 0.0,
"per_1k_output": 0.0
},
"description": "Local Ollama instance for DGX and x86 Linux deployments. Uses ollama-host DNS resolution.",
"is_active": True
},
{
"model_id": "ollama-local-macos",
"name": "Local Ollama (MacOS)",
"version": "1.0",
"provider": "ollama",
"model_type": "llm",
"endpoint": "http://host.docker.internal:11434/v1/chat/completions",
"api_key_name": None, # No API key needed for local Ollama
"specifications": {
"context_window": 131072,
"max_tokens": 4096,
},
"capabilities": {
"streaming": True,
"function_calling": False
},
"cost": {
"per_1k_input": 0.0,
"per_1k_output": 0.0
},
"description": "Local Ollama instance for macOS deployments. Uses host.docker.internal for Docker-to-host networking.",
"is_active": True
}
]
# TTS Models (placeholder - will be added when available)
tts_models = [
# Future TTS models from Groq/PlayAI
]
# Combine all models
all_models = groq_llm_models + groq_audio_models + external_models + ollama_models + tts_models
return all_models
def get_groq_models() -> List[Dict[str, Any]]:
"""Get only Groq models"""
return [model for model in get_default_models() if model["provider"] == "groq"]
def get_external_models() -> List[Dict[str, Any]]:
"""Get only external models (BGE-M3, etc.)"""
return [model for model in get_default_models() if model["provider"] == "external"]
def get_ollama_models() -> List[Dict[str, Any]]:
"""Get only Ollama models (local inference)"""
return [model for model in get_default_models() if model["provider"] == "ollama"]
def get_models_by_type(model_type: str) -> List[Dict[str, Any]]:
"""Get models by type (llm, embedding, audio, tts)"""
return [model for model in get_default_models() if model["model_type"] == model_type]