GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents

- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2 - Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2 - Made more general-purpose (flexible targets, expanded tools) - Added nemotron-mini-agent.csv for fast local inference via Ollama - Added nemotron-agent.csv for advanced reasoning via Ollama - Added wiki page: Projects for NVIDIA NIMs and Nemotron
2025-12-12 17:47:14 -05:00
commit 310491a557
750 changed files with 232701 additions and 0 deletions
--- a/apps/resource-cluster/app/core/config.py
+++ b/apps/resource-cluster/app/core/config.py
@@ -0,0 +1,293 @@
+"""
+GT 2.0 Resource Cluster Configuration
+
+Central configuration for the air-gapped Resource Cluster that manages
+all AI resources, document processing, and external service integrations.
+"""
+
+import os
+from typing import List, Dict, Any, Optional
+from pydantic_settings import BaseSettings
+from pydantic import Field, validator
+
+
+class Settings(BaseSettings):
+    """Resource Cluster settings with environment variable support"""
+    
+    # Environment
+    environment: str = Field(default="development", description="Runtime environment")
+    debug: bool = Field(default=False, description="Debug mode")
+    
+    # Service Identity
+    cluster_name: str = Field(default="gt-resource-cluster", description="Cluster identifier")
+    service_port: int = Field(default=8003, description="Service port")
+    
+    # Security
+    secret_key: str = Field(..., description="JWT signing key for capability tokens")
+    algorithm: str = Field(default="HS256", description="JWT algorithm")
+    capability_token_expire_minutes: int = Field(default=60, description="Capability token expiry")
+    
+    # External LLM Providers (via HAProxy)
+    groq_api_key: Optional[str] = Field(default=None, description="Groq Cloud API key")
+    groq_endpoints: List[str] = Field(
+        default=["https://api.groq.com/openai/v1"],
+        description="Groq API endpoints for load balancing"
+    )
+    openai_api_key: Optional[str] = Field(default=None, description="OpenAI API key")
+    anthropic_api_key: Optional[str] = Field(default=None, description="Anthropic API key")
+
+    # NVIDIA NIM Configuration
+    nvidia_nim_endpoint: str = Field(
+        default="https://integrate.api.nvidia.com/v1",
+        description="NVIDIA NIM API endpoint (cloud or self-hosted)"
+    )
+    nvidia_nim_enabled: bool = Field(
+        default=True,
+        description="Enable NVIDIA NIM backend for GPU-accelerated inference"
+    )
+    
+    # HAProxy Configuration
+    haproxy_groq_endpoint: str = Field(
+        default="http://haproxy-groq-lb-service.gt-resource.svc.cluster.local",
+        description="HAProxy load balancer endpoint for Groq API"
+    )
+    haproxy_stats_endpoint: str = Field(
+        default="http://haproxy-groq-lb-service.gt-resource.svc.cluster.local:8404/stats",
+        description="HAProxy statistics endpoint"
+    )
+    haproxy_admin_socket: str = Field(
+        default="/var/run/haproxy.sock",
+        description="HAProxy admin socket for runtime configuration"
+    )
+    haproxy_enabled: bool = Field(
+        default=True,
+        description="Enable HAProxy load balancing for external APIs"
+    )
+    
+    # Control Panel Integration (for API key retrieval)
+    control_panel_url: str = Field(
+        default="http://control-panel-backend:8000",
+        description="Control Panel internal API URL for service-to-service calls"
+    )
+    service_auth_token: str = Field(
+        default="internal-service-token",
+        description="Service-to-service authentication token"
+    )
+
+    # Admin Cluster Configuration Sync
+    admin_cluster_url: str = Field(
+        default="http://localhost:8001",
+        description="Admin cluster URL for configuration sync"
+    )
+    config_sync_interval: int = Field(
+        default=10,
+        description="Configuration sync interval in seconds"
+    )
+    config_sync_enabled: bool = Field(
+        default=True,
+        description="Enable automatic configuration sync from admin cluster"
+    )
+    
+    # Consul Service Discovery
+    consul_host: str = Field(default="localhost", description="Consul host")
+    consul_port: int = Field(default=8500, description="Consul port")
+    consul_token: Optional[str] = Field(default=None, description="Consul ACL token")
+    
+    # Document Processing
+    chunking_engine_workers: int = Field(default=4, description="Parallel document processors")
+    max_document_size_mb: int = Field(default=50, description="Maximum document size")
+    supported_document_types: List[str] = Field(
+        default=[".pdf", ".docx", ".txt", ".md", ".html", ".pptx", ".xlsx", ".csv"],
+        description="Supported document formats"
+    )
+
+    # BGE-M3 Embedding Configuration
+    embedding_endpoint: str = Field(
+        default="http://gentwo-vllm-embeddings:8000/v1/embeddings",
+        description="Default embedding endpoint (local or external)"
+    )
+    bge_m3_local_mode: bool = Field(
+        default=True,
+        description="Use local BGE-M3 embedding service (True) or external endpoint (False)"
+    )
+    bge_m3_external_endpoint: Optional[str] = Field(
+        default=None,
+        description="External BGE-M3 embedding endpoint URL (when local_mode=False)"
+    )
+    
+    # Vector Database (ChromaDB)
+    chromadb_host: str = Field(default="localhost", description="ChromaDB host")
+    chromadb_port: int = Field(default=8000, description="ChromaDB port")
+    chromadb_encryption_key: Optional[str] = Field(
+        default=None,
+        description="Encryption key for vector storage"
+    )
+    
+    # Resource Limits
+    max_concurrent_inferences: int = Field(default=100, description="Max concurrent LLM calls")
+    max_tokens_per_request: int = Field(default=8000, description="Max tokens per LLM request")
+    rate_limit_requests_per_minute: int = Field(default=60, description="Global rate limit")
+    
+    # Storage Paths
+    data_directory: str = Field(
+        default="/tmp/gt2-resource-cluster" if os.getenv("ENVIRONMENT") != "production" else "/data/resource-cluster",
+        description="Base data directory"
+    )
+    template_library_path: str = Field(
+        default="/tmp/gt2-resource-cluster/templates" if os.getenv("ENVIRONMENT") != "production" else "/data/resource-cluster/templates",
+        description="Agent template library"
+    )
+    models_cache_path: str = Field(  # Renamed to avoid pydantic warning
+        default="/tmp/gt2-resource-cluster/models" if os.getenv("ENVIRONMENT") != "production" else "/data/resource-cluster/models",
+        description="Local model cache"
+    )
+    
+    # Redis removed - Resource Cluster uses PostgreSQL for caching and rate limiting
+    
+    # Monitoring
+    prometheus_enabled: bool = Field(default=True, description="Enable Prometheus metrics")
+    prometheus_port: int = Field(default=9091, description="Prometheus metrics port")
+    
+    # CORS Configuration (for tenant backends)
+    cors_origins: List[str] = Field(
+        default=["http://localhost:8002", "https://*.gt2.com"],
+        description="Allowed CORS origins"
+    )
+    
+    # Trusted Host Configuration
+    trusted_hosts: List[str] = Field(
+        default=["localhost", "*.gt2.com", "resource-cluster", "gentwo-resource-backend", 
+                 "gt2-resource-backend", "testserver", "127.0.0.1", "*"],
+        description="Allowed host headers for TrustedHostMiddleware"
+    )
+    
+    # Feature Flags
+    enable_model_caching: bool = Field(default=True, description="Cache model responses")
+    enable_usage_tracking: bool = Field(default=True, description="Track resource usage")
+    enable_cost_calculation: bool = Field(default=True, description="Calculate usage costs")
+    
+    @validator("data_directory")
+    def validate_data_directory(cls, v):
+        # Ensure directory exists with secure permissions
+        os.makedirs(v, exist_ok=True, mode=0o700)
+        return v
+    
+    @validator("template_library_path")
+    def validate_template_library_path(cls, v):
+        os.makedirs(v, exist_ok=True, mode=0o700)
+        return v
+    
+    @validator("models_cache_path")
+    def validate_models_cache_path(cls, v):
+        os.makedirs(v, exist_ok=True, mode=0o700)
+        return v
+    
+    model_config = {
+        "env_file": ".env",
+        "env_file_encoding": "utf-8",
+        "case_sensitive": False,
+        "extra": "ignore",
+    }
+
+
+def get_settings(tenant_id: Optional[str] = None) -> Settings:
+    """Get tenant-scoped application settings"""
+    # For development, use a simple cache without tenant isolation
+    if os.getenv("ENVIRONMENT") == "development":
+        return Settings()
+    
+    # In production, settings should be tenant-scoped
+    # This prevents global state from affecting tenant isolation
+    if tenant_id:
+        # Create tenant-specific settings with proper isolation
+        settings = Settings()
+        # Add tenant-specific configurations here if needed
+        return settings
+    else:
+        # Default settings for non-tenant operations
+        return Settings()
+
+
+def get_resource_families(tenant_id: Optional[str] = None) -> Dict[str, Any]:
+    """Get tenant-scoped resource family definitions (from CLAUDE.md)"""
+    # Base resource families - can be extended per tenant in production
+    return {
+        "ai_ml": {
+            "name": "AI/ML Resources",
+            "subtypes": ["llm", "embedding", "image_generation", "function_calling"]
+        },
+        "rag_engine": {
+            "name": "RAG Engine Resources",
+            "subtypes": ["vector_db", "document_processor", "semantic_search", "retrieval"]
+        },
+        "agentic_workflow": {
+            "name": "Agentic Workflow Resources",
+            "subtypes": ["single_agent", "multi_agent", "orchestration", "memory"]
+        },
+        "app_integration": {
+            "name": "App Integration Resources",
+            "subtypes": ["oauth2", "webhook", "api_connector", "database_connector"]
+        },
+        "external_service": {
+            "name": "External Web Services",
+            "subtypes": ["iframe_embed", "sso_service", "remote_desktop", "learning_platform"]
+        },
+        "ai_literacy": {
+            "name": "AI Literacy & Cognitive Skills",
+            "subtypes": ["strategic_game", "logic_puzzle", "philosophical_dilemma", "educational_content"]
+        }
+    }
+
+def get_model_configs(tenant_id: Optional[str] = None) -> Dict[str, Any]:
+    """Get tenant-scoped model configurations for different providers"""
+    # Base model configurations - can be customized per tenant in production
+    return {
+        "groq": {
+            "llama-3.1-70b-versatile": {
+                "max_tokens": 8000,
+                "cost_per_1k_tokens": 0.59,
+                "supports_streaming": True,
+                "supports_function_calling": True
+            },
+            "llama-3.1-8b-instant": {
+                "max_tokens": 8000,
+                "cost_per_1k_tokens": 0.05,
+                "supports_streaming": True,
+                "supports_function_calling": True
+            },
+            "mixtral-8x7b-32768": {
+                "max_tokens": 32768,
+                "cost_per_1k_tokens": 0.27,
+                "supports_streaming": True,
+                "supports_function_calling": False
+            }
+        },
+        "openai": {
+            "gpt-4-turbo": {
+                "max_tokens": 128000,
+                "cost_per_1k_tokens": 10.0,
+                "supports_streaming": True,
+                "supports_function_calling": True
+            },
+            "gpt-3.5-turbo": {
+                "max_tokens": 16385,
+                "cost_per_1k_tokens": 0.5,
+                "supports_streaming": True,
+                "supports_function_calling": True
+            }
+        },
+        "anthropic": {
+            "claude-3-opus": {
+                "max_tokens": 200000,
+                "cost_per_1k_tokens": 15.0,
+                "supports_streaming": True,
+                "supports_function_calling": False
+            },
+            "claude-3-sonnet": {
+                "max_tokens": 200000,
+                "cost_per_1k_tokens": 3.0,
+                "supports_streaming": True,
+                "supports_function_calling": False
+            }
+        }
+    }