GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents
- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2 - Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2 - Made more general-purpose (flexible targets, expanded tools) - Added nemotron-mini-agent.csv for fast local inference via Ollama - Added nemotron-agent.csv for advanced reasoning via Ollama - Added wiki page: Projects for NVIDIA NIMs and Nemotron
This commit is contained in:
293
apps/resource-cluster/app/core/config.py
Normal file
293
apps/resource-cluster/app/core/config.py
Normal file
@@ -0,0 +1,293 @@
|
||||
"""
|
||||
GT 2.0 Resource Cluster Configuration
|
||||
|
||||
Central configuration for the air-gapped Resource Cluster that manages
|
||||
all AI resources, document processing, and external service integrations.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import List, Dict, Any, Optional
|
||||
from pydantic_settings import BaseSettings
|
||||
from pydantic import Field, validator
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Resource Cluster settings with environment variable support"""
|
||||
|
||||
# Environment
|
||||
environment: str = Field(default="development", description="Runtime environment")
|
||||
debug: bool = Field(default=False, description="Debug mode")
|
||||
|
||||
# Service Identity
|
||||
cluster_name: str = Field(default="gt-resource-cluster", description="Cluster identifier")
|
||||
service_port: int = Field(default=8003, description="Service port")
|
||||
|
||||
# Security
|
||||
secret_key: str = Field(..., description="JWT signing key for capability tokens")
|
||||
algorithm: str = Field(default="HS256", description="JWT algorithm")
|
||||
capability_token_expire_minutes: int = Field(default=60, description="Capability token expiry")
|
||||
|
||||
# External LLM Providers (via HAProxy)
|
||||
groq_api_key: Optional[str] = Field(default=None, description="Groq Cloud API key")
|
||||
groq_endpoints: List[str] = Field(
|
||||
default=["https://api.groq.com/openai/v1"],
|
||||
description="Groq API endpoints for load balancing"
|
||||
)
|
||||
openai_api_key: Optional[str] = Field(default=None, description="OpenAI API key")
|
||||
anthropic_api_key: Optional[str] = Field(default=None, description="Anthropic API key")
|
||||
|
||||
# NVIDIA NIM Configuration
|
||||
nvidia_nim_endpoint: str = Field(
|
||||
default="https://integrate.api.nvidia.com/v1",
|
||||
description="NVIDIA NIM API endpoint (cloud or self-hosted)"
|
||||
)
|
||||
nvidia_nim_enabled: bool = Field(
|
||||
default=True,
|
||||
description="Enable NVIDIA NIM backend for GPU-accelerated inference"
|
||||
)
|
||||
|
||||
# HAProxy Configuration
|
||||
haproxy_groq_endpoint: str = Field(
|
||||
default="http://haproxy-groq-lb-service.gt-resource.svc.cluster.local",
|
||||
description="HAProxy load balancer endpoint for Groq API"
|
||||
)
|
||||
haproxy_stats_endpoint: str = Field(
|
||||
default="http://haproxy-groq-lb-service.gt-resource.svc.cluster.local:8404/stats",
|
||||
description="HAProxy statistics endpoint"
|
||||
)
|
||||
haproxy_admin_socket: str = Field(
|
||||
default="/var/run/haproxy.sock",
|
||||
description="HAProxy admin socket for runtime configuration"
|
||||
)
|
||||
haproxy_enabled: bool = Field(
|
||||
default=True,
|
||||
description="Enable HAProxy load balancing for external APIs"
|
||||
)
|
||||
|
||||
# Control Panel Integration (for API key retrieval)
|
||||
control_panel_url: str = Field(
|
||||
default="http://control-panel-backend:8000",
|
||||
description="Control Panel internal API URL for service-to-service calls"
|
||||
)
|
||||
service_auth_token: str = Field(
|
||||
default="internal-service-token",
|
||||
description="Service-to-service authentication token"
|
||||
)
|
||||
|
||||
# Admin Cluster Configuration Sync
|
||||
admin_cluster_url: str = Field(
|
||||
default="http://localhost:8001",
|
||||
description="Admin cluster URL for configuration sync"
|
||||
)
|
||||
config_sync_interval: int = Field(
|
||||
default=10,
|
||||
description="Configuration sync interval in seconds"
|
||||
)
|
||||
config_sync_enabled: bool = Field(
|
||||
default=True,
|
||||
description="Enable automatic configuration sync from admin cluster"
|
||||
)
|
||||
|
||||
# Consul Service Discovery
|
||||
consul_host: str = Field(default="localhost", description="Consul host")
|
||||
consul_port: int = Field(default=8500, description="Consul port")
|
||||
consul_token: Optional[str] = Field(default=None, description="Consul ACL token")
|
||||
|
||||
# Document Processing
|
||||
chunking_engine_workers: int = Field(default=4, description="Parallel document processors")
|
||||
max_document_size_mb: int = Field(default=50, description="Maximum document size")
|
||||
supported_document_types: List[str] = Field(
|
||||
default=[".pdf", ".docx", ".txt", ".md", ".html", ".pptx", ".xlsx", ".csv"],
|
||||
description="Supported document formats"
|
||||
)
|
||||
|
||||
# BGE-M3 Embedding Configuration
|
||||
embedding_endpoint: str = Field(
|
||||
default="http://gentwo-vllm-embeddings:8000/v1/embeddings",
|
||||
description="Default embedding endpoint (local or external)"
|
||||
)
|
||||
bge_m3_local_mode: bool = Field(
|
||||
default=True,
|
||||
description="Use local BGE-M3 embedding service (True) or external endpoint (False)"
|
||||
)
|
||||
bge_m3_external_endpoint: Optional[str] = Field(
|
||||
default=None,
|
||||
description="External BGE-M3 embedding endpoint URL (when local_mode=False)"
|
||||
)
|
||||
|
||||
# Vector Database (ChromaDB)
|
||||
chromadb_host: str = Field(default="localhost", description="ChromaDB host")
|
||||
chromadb_port: int = Field(default=8000, description="ChromaDB port")
|
||||
chromadb_encryption_key: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Encryption key for vector storage"
|
||||
)
|
||||
|
||||
# Resource Limits
|
||||
max_concurrent_inferences: int = Field(default=100, description="Max concurrent LLM calls")
|
||||
max_tokens_per_request: int = Field(default=8000, description="Max tokens per LLM request")
|
||||
rate_limit_requests_per_minute: int = Field(default=60, description="Global rate limit")
|
||||
|
||||
# Storage Paths
|
||||
data_directory: str = Field(
|
||||
default="/tmp/gt2-resource-cluster" if os.getenv("ENVIRONMENT") != "production" else "/data/resource-cluster",
|
||||
description="Base data directory"
|
||||
)
|
||||
template_library_path: str = Field(
|
||||
default="/tmp/gt2-resource-cluster/templates" if os.getenv("ENVIRONMENT") != "production" else "/data/resource-cluster/templates",
|
||||
description="Agent template library"
|
||||
)
|
||||
models_cache_path: str = Field( # Renamed to avoid pydantic warning
|
||||
default="/tmp/gt2-resource-cluster/models" if os.getenv("ENVIRONMENT") != "production" else "/data/resource-cluster/models",
|
||||
description="Local model cache"
|
||||
)
|
||||
|
||||
# Redis removed - Resource Cluster uses PostgreSQL for caching and rate limiting
|
||||
|
||||
# Monitoring
|
||||
prometheus_enabled: bool = Field(default=True, description="Enable Prometheus metrics")
|
||||
prometheus_port: int = Field(default=9091, description="Prometheus metrics port")
|
||||
|
||||
# CORS Configuration (for tenant backends)
|
||||
cors_origins: List[str] = Field(
|
||||
default=["http://localhost:8002", "https://*.gt2.com"],
|
||||
description="Allowed CORS origins"
|
||||
)
|
||||
|
||||
# Trusted Host Configuration
|
||||
trusted_hosts: List[str] = Field(
|
||||
default=["localhost", "*.gt2.com", "resource-cluster", "gentwo-resource-backend",
|
||||
"gt2-resource-backend", "testserver", "127.0.0.1", "*"],
|
||||
description="Allowed host headers for TrustedHostMiddleware"
|
||||
)
|
||||
|
||||
# Feature Flags
|
||||
enable_model_caching: bool = Field(default=True, description="Cache model responses")
|
||||
enable_usage_tracking: bool = Field(default=True, description="Track resource usage")
|
||||
enable_cost_calculation: bool = Field(default=True, description="Calculate usage costs")
|
||||
|
||||
@validator("data_directory")
|
||||
def validate_data_directory(cls, v):
|
||||
# Ensure directory exists with secure permissions
|
||||
os.makedirs(v, exist_ok=True, mode=0o700)
|
||||
return v
|
||||
|
||||
@validator("template_library_path")
|
||||
def validate_template_library_path(cls, v):
|
||||
os.makedirs(v, exist_ok=True, mode=0o700)
|
||||
return v
|
||||
|
||||
@validator("models_cache_path")
|
||||
def validate_models_cache_path(cls, v):
|
||||
os.makedirs(v, exist_ok=True, mode=0o700)
|
||||
return v
|
||||
|
||||
model_config = {
|
||||
"env_file": ".env",
|
||||
"env_file_encoding": "utf-8",
|
||||
"case_sensitive": False,
|
||||
"extra": "ignore",
|
||||
}
|
||||
|
||||
|
||||
def get_settings(tenant_id: Optional[str] = None) -> Settings:
|
||||
"""Get tenant-scoped application settings"""
|
||||
# For development, use a simple cache without tenant isolation
|
||||
if os.getenv("ENVIRONMENT") == "development":
|
||||
return Settings()
|
||||
|
||||
# In production, settings should be tenant-scoped
|
||||
# This prevents global state from affecting tenant isolation
|
||||
if tenant_id:
|
||||
# Create tenant-specific settings with proper isolation
|
||||
settings = Settings()
|
||||
# Add tenant-specific configurations here if needed
|
||||
return settings
|
||||
else:
|
||||
# Default settings for non-tenant operations
|
||||
return Settings()
|
||||
|
||||
|
||||
def get_resource_families(tenant_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Get tenant-scoped resource family definitions (from CLAUDE.md)"""
|
||||
# Base resource families - can be extended per tenant in production
|
||||
return {
|
||||
"ai_ml": {
|
||||
"name": "AI/ML Resources",
|
||||
"subtypes": ["llm", "embedding", "image_generation", "function_calling"]
|
||||
},
|
||||
"rag_engine": {
|
||||
"name": "RAG Engine Resources",
|
||||
"subtypes": ["vector_db", "document_processor", "semantic_search", "retrieval"]
|
||||
},
|
||||
"agentic_workflow": {
|
||||
"name": "Agentic Workflow Resources",
|
||||
"subtypes": ["single_agent", "multi_agent", "orchestration", "memory"]
|
||||
},
|
||||
"app_integration": {
|
||||
"name": "App Integration Resources",
|
||||
"subtypes": ["oauth2", "webhook", "api_connector", "database_connector"]
|
||||
},
|
||||
"external_service": {
|
||||
"name": "External Web Services",
|
||||
"subtypes": ["iframe_embed", "sso_service", "remote_desktop", "learning_platform"]
|
||||
},
|
||||
"ai_literacy": {
|
||||
"name": "AI Literacy & Cognitive Skills",
|
||||
"subtypes": ["strategic_game", "logic_puzzle", "philosophical_dilemma", "educational_content"]
|
||||
}
|
||||
}
|
||||
|
||||
def get_model_configs(tenant_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Get tenant-scoped model configurations for different providers"""
|
||||
# Base model configurations - can be customized per tenant in production
|
||||
return {
|
||||
"groq": {
|
||||
"llama-3.1-70b-versatile": {
|
||||
"max_tokens": 8000,
|
||||
"cost_per_1k_tokens": 0.59,
|
||||
"supports_streaming": True,
|
||||
"supports_function_calling": True
|
||||
},
|
||||
"llama-3.1-8b-instant": {
|
||||
"max_tokens": 8000,
|
||||
"cost_per_1k_tokens": 0.05,
|
||||
"supports_streaming": True,
|
||||
"supports_function_calling": True
|
||||
},
|
||||
"mixtral-8x7b-32768": {
|
||||
"max_tokens": 32768,
|
||||
"cost_per_1k_tokens": 0.27,
|
||||
"supports_streaming": True,
|
||||
"supports_function_calling": False
|
||||
}
|
||||
},
|
||||
"openai": {
|
||||
"gpt-4-turbo": {
|
||||
"max_tokens": 128000,
|
||||
"cost_per_1k_tokens": 10.0,
|
||||
"supports_streaming": True,
|
||||
"supports_function_calling": True
|
||||
},
|
||||
"gpt-3.5-turbo": {
|
||||
"max_tokens": 16385,
|
||||
"cost_per_1k_tokens": 0.5,
|
||||
"supports_streaming": True,
|
||||
"supports_function_calling": True
|
||||
}
|
||||
},
|
||||
"anthropic": {
|
||||
"claude-3-opus": {
|
||||
"max_tokens": 200000,
|
||||
"cost_per_1k_tokens": 15.0,
|
||||
"supports_streaming": True,
|
||||
"supports_function_calling": False
|
||||
},
|
||||
"claude-3-sonnet": {
|
||||
"max_tokens": 200000,
|
||||
"cost_per_1k_tokens": 3.0,
|
||||
"supports_streaming": True,
|
||||
"supports_function_calling": False
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user