GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents

- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2 - Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2 - Made more general-purpose (flexible targets, expanded tools) - Added nemotron-mini-agent.csv for fast local inference via Ollama - Added nemotron-agent.csv for advanced reasoning via Ollama - Added wiki page: Projects for NVIDIA NIMs and Nemotron
2025-12-12 17:47:14 -05:00
commit 310491a557
750 changed files with 232701 additions and 0 deletions
--- a/apps/resource-cluster/app/services/admin_model_config_service.py
+++ b/apps/resource-cluster/app/services/admin_model_config_service.py
@@ -0,0 +1,342 @@
+"""
+Admin Model Configuration Service for GT 2.0 Resource Cluster
+
+This service fetches model configurations from the Admin Control Panel
+and provides them to the Resource Cluster for LLM routing and capabilities.
+"""
+
+import asyncio
+import logging
+import httpx
+from typing import Dict, Any, List, Optional
+from datetime import datetime, timedelta
+from dataclasses import dataclass
+import json
+
+from app.core.config import get_settings
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class AdminModelConfig:
+    """Model configuration from admin cluster"""
+    uuid: str  # Database UUID - unique identifier for this model config
+    model_id: str  # Business identifier - the model name used in API calls
+    name: str
+    provider: str
+    model_type: str
+    endpoint: str
+    api_key_name: Optional[str]
+    context_window: Optional[int]
+    max_tokens: Optional[int]
+    capabilities: Dict[str, Any]
+    cost_per_1k_input: float
+    cost_per_1k_output: float
+    is_active: bool
+    tenant_restrictions: Dict[str, Any]
+    required_capabilities: List[str]
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for LLM Gateway"""
+        return {
+            "uuid": self.uuid,
+            "model_id": self.model_id,
+            "name": self.name,
+            "provider": self.provider,
+            "model_type": self.model_type,
+            "endpoint": self.endpoint,
+            "api_key_name": self.api_key_name,
+            "context_window": self.context_window,
+            "max_tokens": self.max_tokens,
+            "capabilities": self.capabilities,
+            "cost_per_1k_input": self.cost_per_1k_input,
+            "cost_per_1k_output": self.cost_per_1k_output,
+            "is_active": self.is_active,
+            "tenant_restrictions": self.tenant_restrictions,
+            "required_capabilities": self.required_capabilities
+        }
+
+
+class AdminModelConfigService:
+    """Service for fetching model configurations from Admin Control Panel"""
+    
+    def __init__(self):
+        self.settings = get_settings()
+        self._model_cache: Dict[str, AdminModelConfig] = {}  # model_id -> config
+        self._uuid_cache: Dict[str, AdminModelConfig] = {}   # uuid -> config (for UUID-based lookups)
+        self._tenant_model_cache: Dict[str, List[str]] = {}  # tenant_id -> list of allowed model_ids
+        self._last_sync: datetime = datetime.min
+        self._sync_interval = timedelta(seconds=self.settings.config_sync_interval)
+        self._sync_lock = asyncio.Lock()
+
+    async def get_model_config(self, model_id: str) -> Optional[AdminModelConfig]:
+        """Get configuration for a specific model by model_id string"""
+        await self._ensure_fresh_cache()
+        return self._model_cache.get(model_id)
+
+    async def get_model_by_uuid(self, uuid: str) -> Optional[AdminModelConfig]:
+        """Get configuration for a specific model by database UUID"""
+        await self._ensure_fresh_cache()
+        return self._uuid_cache.get(uuid)
+    
+    async def get_all_models(self, active_only: bool = True) -> List[AdminModelConfig]:
+        """Get all model configurations"""
+        await self._ensure_fresh_cache()
+        models = list(self._model_cache.values())
+        if active_only:
+            models = [m for m in models if m.is_active]
+        return models
+    
+    async def get_tenant_models(self, tenant_id: str) -> List[AdminModelConfig]:
+        """Get models available to a specific tenant"""
+        await self._ensure_fresh_cache()
+        
+        # Get tenant's allowed model IDs - try multiple formats
+        allowed_model_ids = self._get_tenant_model_ids(tenant_id)
+        
+        # Return model configs for allowed models
+        models = []
+        for model_id in allowed_model_ids:
+            if model_id in self._model_cache and self._model_cache[model_id].is_active:
+                models.append(self._model_cache[model_id])
+        
+        return models
+    
+    async def check_tenant_access(self, tenant_id: str, model_id: str) -> bool:
+        """Check if a tenant has access to a specific model"""
+        await self._ensure_fresh_cache()
+        
+        # Check if model exists and is active
+        model_config = self._model_cache.get(model_id)
+        if not model_config or not model_config.is_active:
+            return False
+        
+        # Only use tenant-specific access (no global access)
+        # This enforces proper tenant model assignments
+        allowed_models = self._get_tenant_model_ids(tenant_id)
+        return model_id in allowed_models
+    
+    def _get_tenant_model_ids(self, tenant_id: str) -> List[str]:
+        """Get model IDs for tenant, handling multiple tenant ID formats"""
+        # Try exact match first (e.g., "test-company")
+        allowed_models = self._tenant_model_cache.get(tenant_id, [])
+        
+        if not allowed_models:
+            # Try converting "test-company" to "test" format
+            if "-" in tenant_id:
+                domain_format = tenant_id.split("-")[0]
+                allowed_models = self._tenant_model_cache.get(domain_format, [])
+            
+            # Try converting "test" to "test-company" format
+            elif tenant_id + "-company" in self._tenant_model_cache:
+                allowed_models = self._tenant_model_cache.get(tenant_id + "-company", [])
+            
+            # Also try tenant_id as numeric string
+            for key, models in self._tenant_model_cache.items():
+                if key.isdigit() and tenant_id in key:
+                    allowed_models.extend(models)
+                    break
+        
+        logger.debug(f"Tenant {tenant_id} has access to models: {allowed_models}")
+        return allowed_models
+    
+    async def get_groq_api_key(self, tenant_id: str = None) -> Optional[str]:
+        """
+        Get Groq API key for a tenant from Control Panel database.
+
+        NO environment variable fallback - per GT 2.0 NO FALLBACKS principle.
+        API keys are managed in Control Panel and fetched via internal API.
+
+        Args:
+            tenant_id: Tenant domain string (required for tenant requests)
+
+        Returns:
+            Decrypted Groq API key
+
+        Raises:
+            ValueError: If no API key configured for tenant
+        """
+        if not tenant_id:
+            raise ValueError("tenant_id is required to fetch Groq API key - no fallback to environment variables")
+
+        from app.clients.api_key_client import get_api_key_client, APIKeyNotConfiguredError
+
+        client = get_api_key_client()
+
+        try:
+            key_info = await client.get_api_key(tenant_domain=tenant_id, provider="groq")
+            return key_info["api_key"]
+        except APIKeyNotConfiguredError as e:
+            logger.error(f"No Groq API key configured for tenant '{tenant_id}': {e}")
+            raise ValueError(f"No Groq API key configured for tenant '{tenant_id}'. Please configure in Control Panel → API Keys.")
+        except RuntimeError as e:
+            logger.error(f"Control Panel API error when fetching API key: {e}")
+            raise ValueError(f"Unable to retrieve API key - Control Panel service unavailable: {e}")
+    
+    async def _ensure_fresh_cache(self):
+        """Ensure model cache is fresh, sync if needed"""
+        now = datetime.utcnow()
+        if now - self._last_sync > self._sync_interval:
+            async with self._sync_lock:
+                # Double-check after acquiring lock
+                now = datetime.utcnow()
+                if now - self._last_sync <= self._sync_interval:
+                    return
+                    
+                await self._sync_from_admin()
+    
+    async def _sync_from_admin(self):
+        """Sync model configurations from admin cluster"""
+        try:
+            # Use correct URL for containerized environment
+            import os
+            if os.path.exists('/.dockerenv'):
+                admin_url = "http://control-panel-backend:8000"
+            else:
+                admin_url = self.settings.admin_cluster_url.rstrip('/')
+            
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                # Fetch all model configurations
+                models_response = await client.get(
+                    f"{admin_url}/api/v1/models/?active_only=true&include_stats=true"
+                )
+                
+                # Fetch tenant model assignments with proper authentication
+                tenant_models_response = await client.get(
+                    f"{admin_url}/api/v1/tenant-models/tenants/all",
+                    headers={
+                        "Authorization": "Bearer admin-dev-token",
+                        "Content-Type": "application/json"
+                    }
+                )
+                
+                if models_response.status_code == 200:
+                    models_data = models_response.json()
+                    if models_data and len(models_data) > 0:
+                        await self._update_model_cache(models_data)
+                        logger.info(f"Successfully synced {len(models_data)} models from admin cluster")
+                        
+                        # Update tenant model assignments if available
+                        if tenant_models_response.status_code == 200:
+                            tenant_data = tenant_models_response.json()
+                            if tenant_data and len(tenant_data) > 0:
+                                await self._update_tenant_cache(tenant_data)
+                                logger.info(f"Successfully synced {len(tenant_data)} tenant model assignments")
+                            else:
+                                logger.warning("No tenant model assignments found")
+                        else:
+                            logger.error(f"Failed to fetch tenant assignments: {tenant_models_response.status_code}")
+                            # Log the actual error for debugging
+                            try:
+                                error_response = tenant_models_response.json()
+                                logger.error(f"Tenant assignments error: {error_response}")
+                            except:
+                                logger.error(f"Tenant assignments error text: {tenant_models_response.text}")
+                        
+                        self._last_sync = datetime.utcnow()
+                        return
+                    else:
+                        logger.warning("Admin cluster returned empty model list")
+                else:
+                    logger.warning(f"Failed to fetch models from admin cluster: {models_response.status_code}")
+            
+            logger.info("No models configured in admin backend")
+            self._last_sync = datetime.utcnow()
+            logger.info(f"Loaded {len(self._model_cache)} models successfully")
+            
+        except Exception as e:
+            logger.error(f"Failed to sync from admin cluster: {e}")
+            
+            # Log final state - no fallback models
+            if not self._model_cache:
+                logger.warning("No models available - admin backend has no models configured")
+    
+    async def _update_model_cache(self, models_data: List[Dict[str, Any]]):
+        """Update model configuration cache"""
+        new_cache = {}
+        new_uuid_cache = {}
+
+        for model_data in models_data:
+            try:
+                specs = model_data.get("specifications", {})
+                cost = model_data.get("cost", {})
+                status = model_data.get("status", {})
+
+                # Get UUID from 'id' field in API response (Control Panel returns UUID as 'id')
+                model_uuid = model_data.get("id", "")
+
+                model_config = AdminModelConfig(
+                    uuid=model_uuid,
+                    model_id=model_data["model_id"],
+                    name=model_data.get("name", model_data["model_id"]),
+                    provider=model_data["provider"],
+                    model_type=model_data["model_type"],
+                    endpoint=model_data.get("endpoint", ""),
+                    api_key_name=model_data.get("api_key_name"),
+                    context_window=specs.get("context_window"),
+                    max_tokens=specs.get("max_tokens"),
+                    capabilities=model_data.get("capabilities", {}),
+                    cost_per_1k_input=cost.get("per_1k_input", 0.0),
+                    cost_per_1k_output=cost.get("per_1k_output", 0.0),
+                    is_active=status.get("is_active", False),
+                    tenant_restrictions=model_data.get("tenant_restrictions", {"global_access": True}),
+                    required_capabilities=model_data.get("required_capabilities", [])
+                )
+
+                new_cache[model_config.model_id] = model_config
+
+                # Also index by UUID for UUID-based lookups
+                if model_uuid:
+                    new_uuid_cache[model_uuid] = model_config
+
+            except Exception as e:
+                logger.error(f"Failed to parse model config {model_data.get('model_id', 'unknown')}: {e}")
+
+        self._model_cache = new_cache
+        self._uuid_cache = new_uuid_cache
+    
+    async def _update_tenant_cache(self, tenant_data: List[Dict[str, Any]]):
+        """Update tenant model access cache from tenant-models endpoint"""
+        new_tenant_cache = {}
+        
+        for assignment in tenant_data:
+            try:
+                # The tenant-models endpoint returns different format than the old endpoint
+                tenant_domain = assignment.get("tenant_domain", "")
+                model_id = assignment["model_id"]
+                is_enabled = assignment.get("is_enabled", True)
+                
+                if is_enabled and tenant_domain:
+                    if tenant_domain not in new_tenant_cache:
+                        new_tenant_cache[tenant_domain] = []
+                    new_tenant_cache[tenant_domain].append(model_id)
+                    
+                    # Also add by tenant_id for backward compatibility
+                    tenant_id = str(assignment.get("tenant_id", ""))
+                    if tenant_id and tenant_id not in new_tenant_cache:
+                        new_tenant_cache[tenant_id] = []
+                    if tenant_id:
+                        new_tenant_cache[tenant_id].append(model_id)
+                    
+            except Exception as e:
+                logger.error(f"Failed to parse tenant assignment: {e}")
+        
+        self._tenant_model_cache = new_tenant_cache
+        logger.debug(f"Updated tenant cache: {self._tenant_model_cache}")
+    
+    async def force_sync(self):
+        """Force immediate sync from admin cluster"""
+        self._last_sync = datetime.min
+        await self._ensure_fresh_cache()
+
+
+# Global instance
+_admin_model_service = None
+
+def get_admin_model_service() -> AdminModelConfigService:
+    """Get singleton admin model service"""
+    global _admin_model_service
+    if _admin_model_service is None:
+        _admin_model_service = AdminModelConfigService()
+    return _admin_model_service