""" GT 2.0 Resource Cluster Configuration Central configuration for the air-gapped Resource Cluster that manages all AI resources, document processing, and external service integrations. """ import os from typing import List, Dict, Any, Optional from pydantic_settings import BaseSettings from pydantic import Field, validator class Settings(BaseSettings): """Resource Cluster settings with environment variable support""" # Environment environment: str = Field(default="development", description="Runtime environment") debug: bool = Field(default=False, description="Debug mode") # Service Identity cluster_name: str = Field(default="gt-resource-cluster", description="Cluster identifier") service_port: int = Field(default=8003, description="Service port") # Security secret_key: str = Field(..., description="JWT signing key for capability tokens") algorithm: str = Field(default="HS256", description="JWT algorithm") capability_token_expire_minutes: int = Field(default=60, description="Capability token expiry") # External LLM Providers (via HAProxy) groq_api_key: Optional[str] = Field(default=None, description="Groq Cloud API key") groq_endpoints: List[str] = Field( default=["https://api.groq.com/openai/v1"], description="Groq API endpoints for load balancing" ) openai_api_key: Optional[str] = Field(default=None, description="OpenAI API key") anthropic_api_key: Optional[str] = Field(default=None, description="Anthropic API key") # NVIDIA NIM Configuration nvidia_nim_endpoint: str = Field( default="https://integrate.api.nvidia.com/v1", description="NVIDIA NIM API endpoint (cloud or self-hosted)" ) nvidia_nim_enabled: bool = Field( default=True, description="Enable NVIDIA NIM backend for GPU-accelerated inference" ) # HAProxy Configuration haproxy_groq_endpoint: str = Field( default="http://haproxy-groq-lb-service.gt-resource.svc.cluster.local", description="HAProxy load balancer endpoint for Groq API" ) haproxy_stats_endpoint: str = Field( default="http://haproxy-groq-lb-service.gt-resource.svc.cluster.local:8404/stats", description="HAProxy statistics endpoint" ) haproxy_admin_socket: str = Field( default="/var/run/haproxy.sock", description="HAProxy admin socket for runtime configuration" ) haproxy_enabled: bool = Field( default=True, description="Enable HAProxy load balancing for external APIs" ) # Control Panel Integration (for API key retrieval) control_panel_url: str = Field( default="http://control-panel-backend:8000", description="Control Panel internal API URL for service-to-service calls" ) service_auth_token: str = Field( default="internal-service-token", description="Service-to-service authentication token" ) # Admin Cluster Configuration Sync admin_cluster_url: str = Field( default="http://localhost:8001", description="Admin cluster URL for configuration sync" ) config_sync_interval: int = Field( default=10, description="Configuration sync interval in seconds" ) config_sync_enabled: bool = Field( default=True, description="Enable automatic configuration sync from admin cluster" ) # Consul Service Discovery consul_host: str = Field(default="localhost", description="Consul host") consul_port: int = Field(default=8500, description="Consul port") consul_token: Optional[str] = Field(default=None, description="Consul ACL token") # Document Processing chunking_engine_workers: int = Field(default=4, description="Parallel document processors") max_document_size_mb: int = Field(default=50, description="Maximum document size") supported_document_types: List[str] = Field( default=[".pdf", ".docx", ".txt", ".md", ".html", ".pptx", ".xlsx", ".csv"], description="Supported document formats" ) # BGE-M3 Embedding Configuration embedding_endpoint: str = Field( default="http://gentwo-vllm-embeddings:8000/v1/embeddings", description="Default embedding endpoint (local or external)" ) bge_m3_local_mode: bool = Field( default=True, description="Use local BGE-M3 embedding service (True) or external endpoint (False)" ) bge_m3_external_endpoint: Optional[str] = Field( default=None, description="External BGE-M3 embedding endpoint URL (when local_mode=False)" ) # Vector Database (ChromaDB) chromadb_host: str = Field(default="localhost", description="ChromaDB host") chromadb_port: int = Field(default=8000, description="ChromaDB port") chromadb_encryption_key: Optional[str] = Field( default=None, description="Encryption key for vector storage" ) # Resource Limits max_concurrent_inferences: int = Field(default=100, description="Max concurrent LLM calls") max_tokens_per_request: int = Field(default=8000, description="Max tokens per LLM request") rate_limit_requests_per_minute: int = Field(default=60, description="Global rate limit") # Storage Paths data_directory: str = Field( default="/tmp/gt2-resource-cluster" if os.getenv("ENVIRONMENT") != "production" else "/data/resource-cluster", description="Base data directory" ) template_library_path: str = Field( default="/tmp/gt2-resource-cluster/templates" if os.getenv("ENVIRONMENT") != "production" else "/data/resource-cluster/templates", description="Agent template library" ) models_cache_path: str = Field( # Renamed to avoid pydantic warning default="/tmp/gt2-resource-cluster/models" if os.getenv("ENVIRONMENT") != "production" else "/data/resource-cluster/models", description="Local model cache" ) # Redis removed - Resource Cluster uses PostgreSQL for caching and rate limiting # Monitoring prometheus_enabled: bool = Field(default=True, description="Enable Prometheus metrics") prometheus_port: int = Field(default=9091, description="Prometheus metrics port") # CORS Configuration (for tenant backends) cors_origins: List[str] = Field( default=["http://localhost:8002", "https://*.gt2.com"], description="Allowed CORS origins" ) # Trusted Host Configuration trusted_hosts: List[str] = Field( default=["localhost", "*.gt2.com", "resource-cluster", "gentwo-resource-backend", "gt2-resource-backend", "testserver", "127.0.0.1", "*"], description="Allowed host headers for TrustedHostMiddleware" ) # Feature Flags enable_model_caching: bool = Field(default=True, description="Cache model responses") enable_usage_tracking: bool = Field(default=True, description="Track resource usage") enable_cost_calculation: bool = Field(default=True, description="Calculate usage costs") @validator("data_directory") def validate_data_directory(cls, v): # Ensure directory exists with secure permissions os.makedirs(v, exist_ok=True, mode=0o700) return v @validator("template_library_path") def validate_template_library_path(cls, v): os.makedirs(v, exist_ok=True, mode=0o700) return v @validator("models_cache_path") def validate_models_cache_path(cls, v): os.makedirs(v, exist_ok=True, mode=0o700) return v model_config = { "env_file": ".env", "env_file_encoding": "utf-8", "case_sensitive": False, "extra": "ignore", } def get_settings(tenant_id: Optional[str] = None) -> Settings: """Get tenant-scoped application settings""" # For development, use a simple cache without tenant isolation if os.getenv("ENVIRONMENT") == "development": return Settings() # In production, settings should be tenant-scoped # This prevents global state from affecting tenant isolation if tenant_id: # Create tenant-specific settings with proper isolation settings = Settings() # Add tenant-specific configurations here if needed return settings else: # Default settings for non-tenant operations return Settings() def get_resource_families(tenant_id: Optional[str] = None) -> Dict[str, Any]: """Get tenant-scoped resource family definitions (from CLAUDE.md)""" # Base resource families - can be extended per tenant in production return { "ai_ml": { "name": "AI/ML Resources", "subtypes": ["llm", "embedding", "image_generation", "function_calling"] }, "rag_engine": { "name": "RAG Engine Resources", "subtypes": ["vector_db", "document_processor", "semantic_search", "retrieval"] }, "agentic_workflow": { "name": "Agentic Workflow Resources", "subtypes": ["single_agent", "multi_agent", "orchestration", "memory"] }, "app_integration": { "name": "App Integration Resources", "subtypes": ["oauth2", "webhook", "api_connector", "database_connector"] }, "external_service": { "name": "External Web Services", "subtypes": ["iframe_embed", "sso_service", "remote_desktop", "learning_platform"] }, "ai_literacy": { "name": "AI Literacy & Cognitive Skills", "subtypes": ["strategic_game", "logic_puzzle", "philosophical_dilemma", "educational_content"] } } def get_model_configs(tenant_id: Optional[str] = None) -> Dict[str, Any]: """Get tenant-scoped model configurations for different providers""" # Base model configurations - can be customized per tenant in production return { "groq": { "llama-3.1-70b-versatile": { "max_tokens": 8000, "cost_per_1k_tokens": 0.59, "supports_streaming": True, "supports_function_calling": True }, "llama-3.1-8b-instant": { "max_tokens": 8000, "cost_per_1k_tokens": 0.05, "supports_streaming": True, "supports_function_calling": True }, "mixtral-8x7b-32768": { "max_tokens": 32768, "cost_per_1k_tokens": 0.27, "supports_streaming": True, "supports_function_calling": False } }, "openai": { "gpt-4-turbo": { "max_tokens": 128000, "cost_per_1k_tokens": 10.0, "supports_streaming": True, "supports_function_calling": True }, "gpt-3.5-turbo": { "max_tokens": 16385, "cost_per_1k_tokens": 0.5, "supports_streaming": True, "supports_function_calling": True } }, "anthropic": { "claude-3-opus": { "max_tokens": 200000, "cost_per_1k_tokens": 15.0, "supports_streaming": True, "supports_function_calling": False }, "claude-3-sonnet": { "max_tokens": 200000, "cost_per_1k_tokens": 3.0, "supports_streaming": True, "supports_function_calling": False } } }