GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents

- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2 - Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2 - Made more general-purpose (flexible targets, expanded tools) - Added nemotron-mini-agent.csv for fast local inference via Ollama - Added nemotron-agent.csv for advanced reasoning via Ollama - Added wiki page: Projects for NVIDIA NIMs and Nemotron
2025-12-12 17:47:14 -05:00
commit 310491a557
750 changed files with 232701 additions and 0 deletions
--- a/apps/resource-cluster/app/api/v1/resources_cbrest.py
+++ b/apps/resource-cluster/app/api/v1/resources_cbrest.py
@@ -0,0 +1,404 @@
+"""
+GT 2.0 Resource Cluster - Resource Management API with CB-REST Standards
+
+This module handles non-AI endpoints using CB-REST standard.
+AI inference endpoints maintain OpenAI compatibility.
+"""
+from typing import List, Optional, Dict, Any
+from fastapi import APIRouter, Depends, Query, Request, BackgroundTasks
+from pydantic import BaseModel, Field
+import logging
+import uuid
+from datetime import datetime, timedelta
+
+from app.core.api_standards import (
+    format_response,
+    format_error,
+    ErrorCode,
+    APIError
+)
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/resources", tags=["Resource Management"])
+
+
+# Request/Response Models
+class HealthCheckRequest(BaseModel):
+    resource_id: str = Field(..., description="Resource identifier")
+    deep_check: bool = Field(False, description="Perform deep health check")
+
+
+class RAGProcessRequest(BaseModel):
+    document_content: str = Field(..., description="Document content to process")
+    chunking_strategy: str = Field("semantic", description="Chunking strategy")
+    chunk_size: int = Field(1000, ge=100, le=10000)
+    chunk_overlap: int = Field(100, ge=0, le=500)
+    embedding_model: str = Field("text-embedding-3-small")
+
+
+class SemanticSearchRequest(BaseModel):
+    query: str = Field(..., description="Search query")
+    collection_id: str = Field(..., description="Vector collection ID")
+    top_k: int = Field(10, ge=1, le=100)
+    relevance_threshold: float = Field(0.7, ge=0.0, le=1.0)
+    filters: Optional[Dict[str, Any]] = None
+
+
+class AgentExecutionRequest(BaseModel):
+    agent_type: str = Field(..., description="Agent type")
+    task: Dict[str, Any] = Field(..., description="Task configuration")
+    timeout: int = Field(300, ge=10, le=3600, description="Timeout in seconds")
+    execution_context: Optional[Dict[str, Any]] = None
+
+
+@router.get("/health/system")
+async def system_health(request: Request):
+    """
+    Get overall system health status
+    
+    CB-REST Capability Required: health:system:read
+    """
+    try:
+        health_status = {
+            "overall_health": "healthy",
+            "service_statuses": [
+                {"service": "ai_inference", "status": "healthy", "latency_ms": 45},
+                {"service": "rag_processing", "status": "healthy", "latency_ms": 120},
+                {"service": "vector_storage", "status": "healthy", "latency_ms": 30},
+                {"service": "agent_orchestration", "status": "healthy", "latency_ms": 85}
+            ],
+            "resource_utilization": {
+                "cpu_percent": 42.5,
+                "memory_percent": 68.3,
+                "gpu_percent": 35.0,
+                "disk_percent": 55.2
+            },
+            "performance_metrics": {
+                "requests_per_second": 145,
+                "average_latency_ms": 95,
+                "error_rate_percent": 0.02,
+                "active_connections": 234
+            },
+            "timestamp": datetime.utcnow().isoformat()
+        }
+        
+        return format_response(
+            data=health_status,
+            capability_used="health:system:read",
+            request_id=getattr(request.state, 'request_id', None)
+        )
+    except Exception as e:
+        logger.error(f"Failed to get system health: {e}")
+        return format_error(
+            code=ErrorCode.SYSTEM_ERROR,
+            message="Internal server error",
+            capability_used="health:system:read",
+            request_id=getattr(request.state, 'request_id', None)
+        )
+
+
+@router.post("/health/check")
+async def check_resource_health(
+    request: Request,
+    health_req: HealthCheckRequest,
+    background_tasks: BackgroundTasks
+):
+    """
+    Perform health check on a specific resource
+    
+    CB-REST Capability Required: health:resource:check
+    """
+    try:
+        # Mock health check result
+        health_result = {
+            "resource_id": health_req.resource_id,
+            "status": "healthy",
+            "latency_ms": 87,
+            "last_successful_request": datetime.utcnow().isoformat(),
+            "error_count_24h": 3,
+            "success_rate_24h": 99.97,
+            "details": {
+                "endpoint_reachable": True,
+                "authentication_valid": True,
+                "rate_limit_ok": True,
+                "response_time_acceptable": True
+            }
+        }
+        
+        if health_req.deep_check:
+            health_result["deep_check_results"] = {
+                "model_loaded": True,
+                "memory_usage_mb": 2048,
+                "inference_test_passed": True,
+                "test_latency_ms": 145
+            }
+        
+        return format_response(
+            data=health_result,
+            capability_used="health:resource:check",
+            request_id=getattr(request.state, 'request_id', None)
+        )
+    except Exception as e:
+        logger.error(f"Failed to check resource health: {e}")
+        return format_error(
+            code=ErrorCode.SYSTEM_ERROR,
+            message="Internal server error",
+            capability_used="health:resource:check",
+            request_id=getattr(request.state, 'request_id', None)
+        )
+
+
+@router.post("/rag/process-document")
+async def process_document(
+    request: Request,
+    rag_req: RAGProcessRequest,
+    background_tasks: BackgroundTasks
+):
+    """
+    Process document for RAG pipeline
+    
+    CB-REST Capability Required: rag:document:process
+    """
+    try:
+        processing_id = str(uuid.uuid4())
+        
+        # Start async processing
+        background_tasks.add_task(
+            process_document_async,
+            processing_id,
+            rag_req
+        )
+        
+        return format_response(
+            data={
+                "processing_id": processing_id,
+                "status": "processing",
+                "chunk_preview": [
+                    {
+                        "chunk_id": f"chunk_{i}",
+                        "text": f"Sample chunk {i} from document...",
+                        "metadata": {"position": i, "size": rag_req.chunk_size}
+                    }
+                    for i in range(3)
+                ],
+                "estimated_completion": (datetime.utcnow() + timedelta(seconds=30)).isoformat()
+            },
+            capability_used="rag:document:process",
+            request_id=getattr(request.state, 'request_id', None)
+        )
+    except Exception as e:
+        logger.error(f"Failed to process document: {e}")
+        return format_error(
+            code=ErrorCode.SYSTEM_ERROR,
+            message="Internal server error",
+            capability_used="rag:document:process",
+            request_id=getattr(request.state, 'request_id', None)
+        )
+
+
+@router.post("/rag/semantic-search")
+async def semantic_search(
+    request: Request,
+    search_req: SemanticSearchRequest
+):
+    """
+    Perform semantic search in vector database
+    
+    CB-REST Capability Required: rag:search:execute
+    """
+    try:
+        # Mock search results
+        results = [
+            {
+                "document_id": f"doc_{i}",
+                "chunk_id": f"chunk_{i}",
+                "text": f"Relevant text snippet {i} matching query: {search_req.query[:50]}...",
+                "relevance_score": 0.95 - (i * 0.05),
+                "metadata": {
+                    "source": f"document_{i}.pdf",
+                    "page": i + 1,
+                    "timestamp": datetime.utcnow().isoformat()
+                }
+            }
+            for i in range(min(search_req.top_k, 5))
+        ]
+        
+        return format_response(
+            data={
+                "results": results,
+                "query_embedding": [0.1] * 10,  # Truncated for brevity
+                "search_metadata": {
+                    "collection_id": search_req.collection_id,
+                    "documents_searched": 1500,
+                    "search_time_ms": 145,
+                    "model_used": "text-embedding-3-small"
+                }
+            },
+            capability_used="rag:search:execute",
+            request_id=getattr(request.state, 'request_id', None)
+        )
+    except Exception as e:
+        logger.error(f"Failed to perform semantic search: {e}")
+        return format_error(
+            code=ErrorCode.SYSTEM_ERROR,
+            message="Internal server error",
+            capability_used="rag:search:execute",
+            request_id=getattr(request.state, 'request_id', None)
+        )
+
+
+@router.post("/agents/execute")
+async def execute_agent(
+    request: Request,
+    agent_req: AgentExecutionRequest,
+    background_tasks: BackgroundTasks
+):
+    """
+    Execute an agentic workflow
+    
+    CB-REST Capability Required: agent:*:execute
+    """
+    try:
+        execution_id = str(uuid.uuid4())
+        
+        # Start async agent execution
+        background_tasks.add_task(
+            execute_agent_async,
+            execution_id,
+            agent_req
+        )
+        
+        return format_response(
+            data={
+                "execution_id": execution_id,
+                "status": "queued",
+                "estimated_duration": agent_req.timeout // 2,
+                "resource_allocation": {
+                    "cpu_cores": 2,
+                    "memory_mb": 4096,
+                    "gpu_allocation": 0.25
+                }
+            },
+            capability_used="agent:*:execute",
+            request_id=getattr(request.state, 'request_id', None)
+        )
+    except Exception as e:
+        logger.error(f"Failed to execute agent: {e}")
+        return format_error(
+            code=ErrorCode.SYSTEM_ERROR,
+            message="Internal server error",
+            capability_used="agent:*:execute",
+            request_id=getattr(request.state, 'request_id', None)
+        )
+
+
+@router.get("/agents/{execution_id}/status")
+async def get_agent_status(
+    request: Request,
+    execution_id: str
+):
+    """
+    Get agent execution status
+    
+    CB-REST Capability Required: agent:{execution_id}:status
+    """
+    try:
+        # Mock status
+        status = {
+            "execution_id": execution_id,
+            "status": "running",
+            "progress_percent": 65,
+            "current_task": {
+                "name": "data_analysis",
+                "status": "in_progress",
+                "started_at": datetime.utcnow().isoformat()
+            },
+            "memory_usage": {
+                "working_memory_mb": 512,
+                "context_size": 8192,
+                "tool_calls_made": 12
+            },
+            "performance_metrics": {
+                "steps_completed": 8,
+                "total_steps": 12,
+                "average_step_time_ms": 2500,
+                "errors_encountered": 0
+            }
+        }
+        
+        return format_response(
+            data=status,
+            capability_used=f"agent:{execution_id}:status",
+            request_id=getattr(request.state, 'request_id', None)
+        )
+    except Exception as e:
+        logger.error(f"Failed to get agent status: {e}")
+        return format_error(
+            code=ErrorCode.SYSTEM_ERROR,
+            message="Internal server error",
+            capability_used=f"agent:{execution_id}:status",
+            request_id=getattr(request.state, 'request_id', None)
+        )
+
+
+@router.post("/usage/record")
+async def record_usage(
+    request: Request,
+    operation_type: str,
+    resource_id: str,
+    usage_metrics: Dict[str, Any]
+):
+    """
+    Record resource usage for billing and analytics
+    
+    CB-REST Capability Required: usage:*:write
+    """
+    try:
+        usage_record = {
+            "record_id": str(uuid.uuid4()),
+            "recorded": True,
+            "updated_quotas": {
+                "tokens_remaining": 950000,
+                "requests_remaining": 9500,
+                "cost_accumulated_cents": 125
+            },
+            "warnings": []
+        }
+        
+        # Check for quota warnings
+        if usage_metrics.get("tokens_used", 0) > 10000:
+            usage_record["warnings"].append({
+                "type": "high_token_usage",
+                "message": "High token usage detected",
+                "threshold": 10000,
+                "actual": usage_metrics.get("tokens_used", 0)
+            })
+        
+        return format_response(
+            data=usage_record,
+            capability_used="usage:*:write",
+            request_id=getattr(request.state, 'request_id', None)
+        )
+    except Exception as e:
+        logger.error(f"Failed to record usage: {e}")
+        return format_error(
+            code=ErrorCode.SYSTEM_ERROR,
+            message="Internal server error",
+            capability_used="usage:*:write",
+            request_id=getattr(request.state, 'request_id', None)
+        )
+
+
+# Async helper functions
+async def process_document_async(processing_id: str, rag_req: RAGProcessRequest):
+    """Background task for document processing"""
+    # Implement actual document processing logic here
+    await asyncio.sleep(30)  # Simulate processing
+    logger.info(f"Document processing completed: {processing_id}")
+
+
+async def execute_agent_async(execution_id: str, agent_req: AgentExecutionRequest):
+    """Background task for agent execution"""
+    # Implement actual agent execution logic here
+    await asyncio.sleep(agent_req.timeout // 2)  # Simulate execution
+    logger.info(f"Agent execution completed: {execution_id}")