GT AI OS Community Edition v2.0.33

Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 17:04:45 -05:00
commit b9dfb86260
746 changed files with 232071 additions and 0 deletions
--- a/apps/tenant-backend/app/api/v1/models.py
+++ b/apps/tenant-backend/app/api/v1/models.py
@@ -0,0 +1,172 @@
+"""
+Tenant Models API - Interface to Resource Cluster Model Management
+
+Provides tenant-scoped access to available AI models from the Resource Cluster.
+"""
+
+from typing import Dict, Any, List, Optional
+from fastapi import APIRouter, HTTPException, status, Depends
+import httpx
+import logging
+
+from app.core.security import get_current_user
+from app.core.config import get_settings
+from app.core.cache import get_cache
+from app.services.resource_cluster_client import ResourceClusterClient
+
+logger = logging.getLogger(__name__)
+settings = get_settings()
+cache = get_cache()
+
+router = APIRouter(prefix="/api/v1/models", tags=["Models"])
+
+
+@router.get("/", summary="List available models for tenant")
+async def list_available_models(
+    current_user: Dict = Depends(get_current_user)
+) -> Dict[str, Any]:
+    """Get list of AI models available to the current tenant"""
+
+    try:
+        # Get tenant domain from current user
+        tenant_domain = current_user.get("tenant_domain", "default")
+
+        # Check cache first (5-minute TTL)
+        cache_key = f"models_list_{tenant_domain}"
+        cached_models = cache.get(cache_key, ttl=300)
+        if cached_models:
+            logger.debug(f"Returning cached model list for tenant {tenant_domain}")
+            return {**cached_models, "cached": True}
+        
+        # Call Resource Cluster models API - use Docker service name if in container
+        import os
+        if os.path.exists('/.dockerenv'):
+            resource_cluster_url = "http://resource-cluster:8000"
+        else:
+            resource_cluster_url = settings.resource_cluster_url
+        
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                f"{resource_cluster_url}/api/v1/models/",
+                headers={
+                    "X-Tenant-Domain": tenant_domain
+                },
+                timeout=30.0
+            )
+            
+            if response.status_code == 200:
+                models_data = response.json()
+                models = models_data.get("models", [])
+                
+                # Filter models by health and deployment status
+                available_models = [
+                    {
+                        "value": model["id"],  # model_id string for backwards compatibility
+                        "uuid": model.get("uuid"),  # Database UUID for unique identification
+                        "label": model["name"],
+                        "description": model["description"],
+                        "provider": model["provider"],
+                        "model_type": model["model_type"],
+                        "max_tokens": model["performance"]["max_tokens"],
+                        "context_window": model["performance"]["context_window"],
+                        "cost_per_1k_tokens": model["performance"]["cost_per_1k_tokens"],
+                        "latency_p50_ms": model["performance"]["latency_p50_ms"],
+                        "health_status": model["status"]["health"],
+                        "deployment_status": model["status"]["deployment"]
+                    }
+                    for model in models
+                    if (model["status"]["deployment"] == "available" and
+                        model["status"]["health"] in ["healthy", "unknown"] and
+                        model["model_type"] != "embedding")
+                ]
+                
+                # Sort by provider preference (NVIDIA first, then Groq) and then by performance
+                provider_order = {"nvidia": 0, "groq": 1}
+                available_models.sort(key=lambda x: (
+                    provider_order.get(x["provider"], 99),  # NVIDIA first, then Groq
+                    x["latency_p50_ms"] or 999  # Lower latency first
+                ))
+
+                result = {
+                    "models": available_models,
+                    "total": len(available_models),
+                    "tenant_domain": tenant_domain,
+                    "last_updated": models_data.get("last_updated"),
+                    "cached": False
+                }
+
+                # Cache the result for 5 minutes
+                cache.set(cache_key, result)
+                logger.debug(f"Cached model list for tenant {tenant_domain}")
+
+                return result
+            
+            else:
+                # Resource Cluster unavailable - return empty list
+                logger.warning(f"Resource Cluster unavailable (HTTP {response.status_code})")
+                return {
+                    "models": [],
+                    "total": 0,
+                    "tenant_domain": tenant_domain,
+                    "message": "No models available - resource cluster unavailable"
+                }
+    
+    except Exception as e:
+        logger.error(f"Error fetching models from Resource Cluster: {e}")
+        # Return empty list in case of error
+        return {
+            "models": [],
+            "total": 0,
+            "tenant_domain": current_user.get("tenant_domain", "default"),
+            "message": "No models available - service error"
+        }
+
+
+
+@router.get("/{model_id}", summary="Get model details")
+async def get_model_details(
+    model_id: str,
+    current_user: Dict = Depends(get_current_user)
+) -> Dict[str, Any]:
+    """Get detailed information about a specific model"""
+    
+    try:
+        tenant_domain = current_user.get("tenant_domain", "default")
+        
+        # Call Resource Cluster for model details - use Docker service name if in container
+        import os
+        if os.path.exists('/.dockerenv'):
+            resource_cluster_url = "http://resource-cluster:8000"
+        else:
+            resource_cluster_url = settings.resource_cluster_url
+        
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                f"{resource_cluster_url}/api/v1/models/{model_id}",
+                headers={
+                    "X-Tenant-Domain": tenant_domain
+                },
+                timeout=15.0
+            )
+            
+            if response.status_code == 200:
+                return response.json()
+            elif response.status_code == 404:
+                raise HTTPException(
+                    status_code=status.HTTP_404_NOT_FOUND,
+                    detail=f"Model {model_id} not found"
+                )
+            else:
+                raise HTTPException(
+                    status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+                    detail="Resource Cluster unavailable"
+                )
+    
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error fetching model {model_id} details: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Failed to get model details"
+        )