GT AI OS Community Edition v2.0.33

Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 17:04:45 -05:00
commit b9dfb86260
746 changed files with 232071 additions and 0 deletions
--- a/apps/tenant-backend/app/core/resource_client.py
+++ b/apps/tenant-backend/app/core/resource_client.py
@@ -0,0 +1,531 @@
+"""
+Resource Cluster Client for GT 2.0 Tenant Backend
+
+Provides stateless access to Resource Cluster services including:
+- Document processing
+- Embedding generation
+- Vector storage (ChromaDB)
+- Model inference
+
+Perfect tenant isolation with capability-based authentication.
+"""
+
+import logging
+import asyncio
+import aiohttp
+import json
+import gc
+from typing import Dict, Any, List, Optional, AsyncGenerator
+from datetime import datetime
+
+from app.core.config import get_settings
+from app.core.capability_client import CapabilityClient
+
+logger = logging.getLogger(__name__)
+
+
+class ResourceClusterClient:
+    """
+    Client for accessing Resource Cluster services with capability-based auth.
+    
+    GT 2.0 Security Principles:
+    - Capability tokens for fine-grained access control
+    - Stateless operations (no data persistence in Resource Cluster)
+    - Perfect tenant isolation
+    - Immediate memory cleanup
+    """
+    
+    def __init__(self):
+        self.settings = get_settings()
+        self.capability_client = CapabilityClient()
+        
+        # Resource Cluster endpoints
+        # IMPORTANT: Use Docker service name for stability across container restarts
+        # Fixed 2025-09-12: Changed from hardcoded IP to service name for reliability
+        self.base_url = getattr(
+            self.settings, 
+            'resource_cluster_url',  # Matches Pydantic field name (case insensitive)
+            'http://gentwo-resource-backend:8000'  # Fallback uses service name, not IP
+        )
+        
+        self.endpoints = {
+            'document_processor': f"{self.base_url}/api/v1/process/document",
+            'embedding_generator': f"{self.base_url}/api/v1/embeddings/generate",
+            'chromadb_backend': f"{self.base_url}/api/v1/vectors",
+            'inference': f"{self.base_url}/api/v1/ai/chat/completions"  # Updated to match actual endpoint
+        }
+        
+        # Request timeouts
+        self.request_timeout = 300  # seconds - 5 minutes for complex agent operations
+        self.upload_timeout = 300   # seconds for large documents
+        
+        logger.info("Resource Cluster client initialized")
+    
+    async def _get_capability_token(
+        self,
+        tenant_id: str,
+        user_id: str,
+        resources: List[str]
+    ) -> str:
+        """Generate capability token for Resource Cluster access"""
+        try:
+            token = await self.capability_client.generate_capability_token(
+                user_email=user_id,  # Using user_id as email for now
+                tenant_id=tenant_id,
+                resources=resources,
+                expires_hours=1
+            )
+            return token
+        except Exception as e:
+            logger.error(f"Failed to generate capability token: {e}")
+            raise
+    
+    async def _make_request(
+        self,
+        method: str,
+        endpoint: str,
+        data: Dict[str, Any],
+        tenant_id: str,
+        user_id: str,
+        resources: List[str],
+        timeout: int = None
+    ) -> Dict[str, Any]:
+        """Make authenticated request to Resource Cluster"""
+        try:
+            # Get capability token
+            token = await self._get_capability_token(tenant_id, user_id, resources)
+            
+            # Prepare headers
+            headers = {
+                'Content-Type': 'application/json',
+                'Authorization': f'Bearer {token}',
+                'X-Tenant-ID': tenant_id,
+                'X-User-ID': user_id,
+                'X-Request-ID': f"{tenant_id}_{user_id}_{datetime.utcnow().timestamp()}"
+            }
+            
+            # Make request
+            timeout_config = aiohttp.ClientTimeout(total=timeout or self.request_timeout)
+            
+            async with aiohttp.ClientSession(timeout=timeout_config) as session:
+                async with session.request(
+                    method=method.upper(),
+                    url=endpoint,
+                    json=data,
+                    headers=headers
+                ) as response:
+                    
+                    if response.status not in [200, 201]:
+                        error_text = await response.text()
+                        raise RuntimeError(
+                            f"Resource Cluster error: {response.status} - {error_text}"
+                        )
+                    
+                    result = await response.json()
+                    return result
+                    
+        except Exception as e:
+            logger.error(f"Resource Cluster request failed: {e}")
+            raise
+    
+    # Document Processing
+    
+    async def process_document(
+        self,
+        content: bytes,
+        document_type: str,
+        strategy_type: str = "hybrid",
+        tenant_id: str = None,
+        user_id: str = None
+    ) -> List[Dict[str, Any]]:
+        """Process document into chunks via Resource Cluster"""
+        try:
+            # Convert bytes to base64 for JSON transport
+            import base64
+            content_b64 = base64.b64encode(content).decode('utf-8')
+            
+            request_data = {
+                "content": content_b64,
+                "document_type": document_type,
+                "strategy": {
+                    "strategy_type": strategy_type,
+                    "chunk_size": 512,
+                    "chunk_overlap": 128
+                }
+            }
+            
+            # Clear original content from memory
+            del content
+            gc.collect()
+            
+            result = await self._make_request(
+                method='POST',
+                endpoint=self.endpoints['document_processor'],
+                data=request_data,
+                tenant_id=tenant_id,
+                user_id=user_id,
+                resources=['document_processing'],
+                timeout=self.upload_timeout
+            )
+            
+            chunks = result.get('chunks', [])
+            logger.info(f"Processed document into {len(chunks)} chunks")
+            
+            return chunks
+            
+        except Exception as e:
+            logger.error(f"Document processing failed: {e}")
+            gc.collect()
+            raise
+    
+    # Embedding Generation
+    
+    async def generate_document_embeddings(
+        self,
+        documents: List[str],
+        tenant_id: str,
+        user_id: str
+    ) -> List[List[float]]:
+        """Generate embeddings for documents"""
+        try:
+            request_data = {
+                "texts": documents,
+                "model": "BAAI/bge-m3",
+                "instruction": None  # Document embeddings don't need instruction
+            }
+            
+            result = await self._make_request(
+                method='POST',
+                endpoint=self.endpoints['embedding_generator'],
+                data=request_data,
+                tenant_id=tenant_id,
+                user_id=user_id,
+                resources=['embedding_generation']
+            )
+            
+            embeddings = result.get('embeddings', [])
+            
+            # Clear documents from memory
+            del documents
+            gc.collect()
+            
+            logger.info(f"Generated {len(embeddings)} document embeddings")
+            return embeddings
+            
+        except Exception as e:
+            logger.error(f"Document embedding generation failed: {e}")
+            gc.collect()
+            raise
+    
+    async def generate_query_embeddings(
+        self,
+        queries: List[str],
+        tenant_id: str,
+        user_id: str
+    ) -> List[List[float]]:
+        """Generate embeddings for queries"""
+        try:
+            request_data = {
+                "texts": queries,
+                "model": "BAAI/bge-m3",
+                "instruction": "Represent this sentence for searching relevant passages: "
+            }
+            
+            result = await self._make_request(
+                method='POST',
+                endpoint=self.endpoints['embedding_generator'],
+                data=request_data,
+                tenant_id=tenant_id,
+                user_id=user_id,
+                resources=['embedding_generation']
+            )
+            
+            embeddings = result.get('embeddings', [])
+            
+            # Clear queries from memory
+            del queries
+            gc.collect()
+            
+            logger.info(f"Generated {len(embeddings)} query embeddings")
+            return embeddings
+            
+        except Exception as e:
+            logger.error(f"Query embedding generation failed: {e}")
+            gc.collect()
+            raise
+    
+    # Vector Storage (ChromaDB)
+    
+    async def create_vector_collection(
+        self,
+        tenant_id: str,
+        user_id: str,
+        dataset_name: str,
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> bool:
+        """Create vector collection in ChromaDB"""
+        try:
+            request_data = {
+                "tenant_id": tenant_id,
+                "user_id": user_id,
+                "dataset_name": dataset_name,
+                "metadata": metadata or {}
+            }
+            
+            result = await self._make_request(
+                method='POST',
+                endpoint=f"{self.endpoints['chromadb_backend']}/collections",
+                data=request_data,
+                tenant_id=tenant_id,
+                user_id=user_id,
+                resources=['vector_storage']
+            )
+            
+            success = result.get('success', False)
+            logger.info(f"Created vector collection for {dataset_name}: {success}")
+            
+            return success
+            
+        except Exception as e:
+            logger.error(f"Vector collection creation failed: {e}")
+            raise
+    
+    async def store_vectors(
+        self,
+        tenant_id: str,
+        user_id: str,
+        dataset_name: str,
+        documents: List[str],
+        embeddings: List[List[float]],
+        metadata: List[Dict[str, Any]] = None,
+        ids: List[str] = None
+    ) -> bool:
+        """Store vectors in ChromaDB"""
+        try:
+            request_data = {
+                "tenant_id": tenant_id,
+                "user_id": user_id,
+                "dataset_name": dataset_name,
+                "documents": documents,
+                "embeddings": embeddings,
+                "metadata": metadata or [],
+                "ids": ids
+            }
+            
+            result = await self._make_request(
+                method='POST',
+                endpoint=f"{self.endpoints['chromadb_backend']}/store",
+                data=request_data,
+                tenant_id=tenant_id,
+                user_id=user_id,
+                resources=['vector_storage']
+            )
+            
+            # Clear vectors from memory immediately
+            del documents, embeddings
+            gc.collect()
+            
+            success = result.get('success', False)
+            logger.info(f"Stored vectors in {dataset_name}: {success}")
+            
+            return success
+            
+        except Exception as e:
+            logger.error(f"Vector storage failed: {e}")
+            gc.collect()
+            raise
+    
+    async def search_vectors(
+        self,
+        tenant_id: str,
+        user_id: str,
+        dataset_name: str,
+        query_embedding: List[float],
+        top_k: int = 5,
+        filter_metadata: Optional[Dict[str, Any]] = None
+    ) -> List[Dict[str, Any]]:
+        """Search vectors in ChromaDB"""
+        try:
+            request_data = {
+                "tenant_id": tenant_id,
+                "user_id": user_id,
+                "dataset_name": dataset_name,
+                "query_embedding": query_embedding,
+                "top_k": top_k,
+                "filter_metadata": filter_metadata or {}
+            }
+            
+            result = await self._make_request(
+                method='POST',
+                endpoint=f"{self.endpoints['chromadb_backend']}/search",
+                data=request_data,
+                tenant_id=tenant_id,
+                user_id=user_id,
+                resources=['vector_storage']
+            )
+            
+            # Clear query embedding from memory
+            del query_embedding
+            gc.collect()
+            
+            results = result.get('results', [])
+            logger.info(f"Found {len(results)} vector search results")
+            
+            return results
+            
+        except Exception as e:
+            logger.error(f"Vector search failed: {e}")
+            gc.collect()
+            raise
+    
+    async def delete_vector_collection(
+        self,
+        tenant_id: str,
+        user_id: str,
+        dataset_name: str
+    ) -> bool:
+        """Delete vector collection from ChromaDB"""
+        try:
+            request_data = {
+                "tenant_id": tenant_id,
+                "user_id": user_id,
+                "dataset_name": dataset_name
+            }
+            
+            result = await self._make_request(
+                method='DELETE',
+                endpoint=f"{self.endpoints['chromadb_backend']}/collections",
+                data=request_data,
+                tenant_id=tenant_id,
+                user_id=user_id,
+                resources=['vector_storage']
+            )
+            
+            success = result.get('success', False)
+            logger.info(f"Deleted vector collection {dataset_name}: {success}")
+            
+            return success
+            
+        except Exception as e:
+            logger.error(f"Vector collection deletion failed: {e}")
+            raise
+    
+    # Model Inference
+    
+    async def inference_with_context(
+        self,
+        messages: List[Dict[str, str]],
+        context: str,
+        model: str = "llama-3.1-70b-versatile",
+        tenant_id: str = None,
+        user_id: str = None
+    ) -> Dict[str, Any]:
+        """Perform inference with RAG context"""
+        try:
+            # Inject context into system message
+            enhanced_messages = []
+            system_context = f"Use the following context to answer the user's question:\n\n{context}\n\n"
+            
+            for msg in messages:
+                if msg.get("role") == "system":
+                    enhanced_msg = msg.copy()
+                    enhanced_msg["content"] = system_context + enhanced_msg["content"]
+                    enhanced_messages.append(enhanced_msg)
+                else:
+                    enhanced_messages.append(msg)
+            
+            # Add system message if none exists
+            if not any(msg.get("role") == "system" for msg in enhanced_messages):
+                enhanced_messages.insert(0, {
+                    "role": "system",
+                    "content": system_context + "You are a helpful AI agent."
+                })
+            
+            request_data = {
+                "messages": enhanced_messages,
+                "model": model,
+                "temperature": 0.7,
+                "max_tokens": 4000,
+                "user_id": user_id,
+                "tenant_id": tenant_id
+            }
+            
+            result = await self._make_request(
+                method='POST',
+                endpoint=self.endpoints['inference'],
+                data=request_data,
+                tenant_id=tenant_id,
+                user_id=user_id,
+                resources=['llm_inference']
+            )
+            
+            # Clear context from memory
+            del context, enhanced_messages
+            gc.collect()
+            
+            return result
+            
+        except Exception as e:
+            logger.error(f"Inference with context failed: {e}")
+            gc.collect()
+            raise
+    
+    async def check_health(self) -> Dict[str, Any]:
+        """Check Resource Cluster health"""
+        try:
+            # Test basic connectivity
+            async with aiohttp.ClientSession() as session:
+                async with session.get(f"{self.base_url}/health") as response:
+                    if response.status == 200:
+                        health_data = await response.json()
+                        return {
+                            "status": "healthy",
+                            "resource_cluster": health_data,
+                            "endpoints": list(self.endpoints.keys()),
+                            "base_url": self.base_url
+                        }
+                    else:
+                        return {
+                            "status": "unhealthy",
+                            "error": f"Health check failed: {response.status}",
+                            "base_url": self.base_url
+                        }
+        except Exception as e:
+            return {
+                "status": "unhealthy",
+                "error": str(e),
+                "base_url": self.base_url
+            }
+    
+    async def call_inference_endpoint(
+        self,
+        tenant_id: str,
+        user_id: str,
+        endpoint: str = "chat/completions",
+        data: Dict[str, Any] = None
+    ) -> Dict[str, Any]:
+        """Call AI inference endpoint on Resource Cluster"""
+        try:
+            # Use the direct inference endpoint
+            inference_url = self.endpoints['inference']
+            
+            # Add tenant/user context to request
+            request_data = data.copy() if data else {}
+            
+            # Make request with capability token
+            result = await self._make_request(
+                method='POST',
+                endpoint=inference_url,
+                data=request_data,
+                tenant_id=tenant_id,
+                user_id=user_id,
+                resources=['llm']  # Use valid ResourceType from resource cluster
+            )
+            
+            return result
+            
+        except Exception as e:
+            logger.error(f"Inference endpoint call failed: {e}")
+            raise
+    
+    # Streaming removed for reliability - using non-streaming only