Files
gt-ai-os-community/apps/resource-cluster/app/api/v1/resources_cbrest.py
HackWeasel 310491a557 GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents
- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2
- Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2
  - Made more general-purpose (flexible targets, expanded tools)
- Added nemotron-mini-agent.csv for fast local inference via Ollama
- Added nemotron-agent.csv for advanced reasoning via Ollama
- Added wiki page: Projects for NVIDIA NIMs and Nemotron
2025-12-12 17:47:14 -05:00

404 lines
13 KiB
Python

"""
GT 2.0 Resource Cluster - Resource Management API with CB-REST Standards
This module handles non-AI endpoints using CB-REST standard.
AI inference endpoints maintain OpenAI compatibility.
"""
from typing import List, Optional, Dict, Any
from fastapi import APIRouter, Depends, Query, Request, BackgroundTasks
from pydantic import BaseModel, Field
import logging
import uuid
from datetime import datetime, timedelta
from app.core.api_standards import (
format_response,
format_error,
ErrorCode,
APIError
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/resources", tags=["Resource Management"])
# Request/Response Models
class HealthCheckRequest(BaseModel):
resource_id: str = Field(..., description="Resource identifier")
deep_check: bool = Field(False, description="Perform deep health check")
class RAGProcessRequest(BaseModel):
document_content: str = Field(..., description="Document content to process")
chunking_strategy: str = Field("semantic", description="Chunking strategy")
chunk_size: int = Field(1000, ge=100, le=10000)
chunk_overlap: int = Field(100, ge=0, le=500)
embedding_model: str = Field("text-embedding-3-small")
class SemanticSearchRequest(BaseModel):
query: str = Field(..., description="Search query")
collection_id: str = Field(..., description="Vector collection ID")
top_k: int = Field(10, ge=1, le=100)
relevance_threshold: float = Field(0.7, ge=0.0, le=1.0)
filters: Optional[Dict[str, Any]] = None
class AgentExecutionRequest(BaseModel):
agent_type: str = Field(..., description="Agent type")
task: Dict[str, Any] = Field(..., description="Task configuration")
timeout: int = Field(300, ge=10, le=3600, description="Timeout in seconds")
execution_context: Optional[Dict[str, Any]] = None
@router.get("/health/system")
async def system_health(request: Request):
"""
Get overall system health status
CB-REST Capability Required: health:system:read
"""
try:
health_status = {
"overall_health": "healthy",
"service_statuses": [
{"service": "ai_inference", "status": "healthy", "latency_ms": 45},
{"service": "rag_processing", "status": "healthy", "latency_ms": 120},
{"service": "vector_storage", "status": "healthy", "latency_ms": 30},
{"service": "agent_orchestration", "status": "healthy", "latency_ms": 85}
],
"resource_utilization": {
"cpu_percent": 42.5,
"memory_percent": 68.3,
"gpu_percent": 35.0,
"disk_percent": 55.2
},
"performance_metrics": {
"requests_per_second": 145,
"average_latency_ms": 95,
"error_rate_percent": 0.02,
"active_connections": 234
},
"timestamp": datetime.utcnow().isoformat()
}
return format_response(
data=health_status,
capability_used="health:system:read",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to get system health: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="health:system:read",
request_id=getattr(request.state, 'request_id', None)
)
@router.post("/health/check")
async def check_resource_health(
request: Request,
health_req: HealthCheckRequest,
background_tasks: BackgroundTasks
):
"""
Perform health check on a specific resource
CB-REST Capability Required: health:resource:check
"""
try:
# Mock health check result
health_result = {
"resource_id": health_req.resource_id,
"status": "healthy",
"latency_ms": 87,
"last_successful_request": datetime.utcnow().isoformat(),
"error_count_24h": 3,
"success_rate_24h": 99.97,
"details": {
"endpoint_reachable": True,
"authentication_valid": True,
"rate_limit_ok": True,
"response_time_acceptable": True
}
}
if health_req.deep_check:
health_result["deep_check_results"] = {
"model_loaded": True,
"memory_usage_mb": 2048,
"inference_test_passed": True,
"test_latency_ms": 145
}
return format_response(
data=health_result,
capability_used="health:resource:check",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to check resource health: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="health:resource:check",
request_id=getattr(request.state, 'request_id', None)
)
@router.post("/rag/process-document")
async def process_document(
request: Request,
rag_req: RAGProcessRequest,
background_tasks: BackgroundTasks
):
"""
Process document for RAG pipeline
CB-REST Capability Required: rag:document:process
"""
try:
processing_id = str(uuid.uuid4())
# Start async processing
background_tasks.add_task(
process_document_async,
processing_id,
rag_req
)
return format_response(
data={
"processing_id": processing_id,
"status": "processing",
"chunk_preview": [
{
"chunk_id": f"chunk_{i}",
"text": f"Sample chunk {i} from document...",
"metadata": {"position": i, "size": rag_req.chunk_size}
}
for i in range(3)
],
"estimated_completion": (datetime.utcnow() + timedelta(seconds=30)).isoformat()
},
capability_used="rag:document:process",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to process document: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="rag:document:process",
request_id=getattr(request.state, 'request_id', None)
)
@router.post("/rag/semantic-search")
async def semantic_search(
request: Request,
search_req: SemanticSearchRequest
):
"""
Perform semantic search in vector database
CB-REST Capability Required: rag:search:execute
"""
try:
# Mock search results
results = [
{
"document_id": f"doc_{i}",
"chunk_id": f"chunk_{i}",
"text": f"Relevant text snippet {i} matching query: {search_req.query[:50]}...",
"relevance_score": 0.95 - (i * 0.05),
"metadata": {
"source": f"document_{i}.pdf",
"page": i + 1,
"timestamp": datetime.utcnow().isoformat()
}
}
for i in range(min(search_req.top_k, 5))
]
return format_response(
data={
"results": results,
"query_embedding": [0.1] * 10, # Truncated for brevity
"search_metadata": {
"collection_id": search_req.collection_id,
"documents_searched": 1500,
"search_time_ms": 145,
"model_used": "text-embedding-3-small"
}
},
capability_used="rag:search:execute",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to perform semantic search: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="rag:search:execute",
request_id=getattr(request.state, 'request_id', None)
)
@router.post("/agents/execute")
async def execute_agent(
request: Request,
agent_req: AgentExecutionRequest,
background_tasks: BackgroundTasks
):
"""
Execute an agentic workflow
CB-REST Capability Required: agent:*:execute
"""
try:
execution_id = str(uuid.uuid4())
# Start async agent execution
background_tasks.add_task(
execute_agent_async,
execution_id,
agent_req
)
return format_response(
data={
"execution_id": execution_id,
"status": "queued",
"estimated_duration": agent_req.timeout // 2,
"resource_allocation": {
"cpu_cores": 2,
"memory_mb": 4096,
"gpu_allocation": 0.25
}
},
capability_used="agent:*:execute",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to execute agent: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="agent:*:execute",
request_id=getattr(request.state, 'request_id', None)
)
@router.get("/agents/{execution_id}/status")
async def get_agent_status(
request: Request,
execution_id: str
):
"""
Get agent execution status
CB-REST Capability Required: agent:{execution_id}:status
"""
try:
# Mock status
status = {
"execution_id": execution_id,
"status": "running",
"progress_percent": 65,
"current_task": {
"name": "data_analysis",
"status": "in_progress",
"started_at": datetime.utcnow().isoformat()
},
"memory_usage": {
"working_memory_mb": 512,
"context_size": 8192,
"tool_calls_made": 12
},
"performance_metrics": {
"steps_completed": 8,
"total_steps": 12,
"average_step_time_ms": 2500,
"errors_encountered": 0
}
}
return format_response(
data=status,
capability_used=f"agent:{execution_id}:status",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to get agent status: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used=f"agent:{execution_id}:status",
request_id=getattr(request.state, 'request_id', None)
)
@router.post("/usage/record")
async def record_usage(
request: Request,
operation_type: str,
resource_id: str,
usage_metrics: Dict[str, Any]
):
"""
Record resource usage for billing and analytics
CB-REST Capability Required: usage:*:write
"""
try:
usage_record = {
"record_id": str(uuid.uuid4()),
"recorded": True,
"updated_quotas": {
"tokens_remaining": 950000,
"requests_remaining": 9500,
"cost_accumulated_cents": 125
},
"warnings": []
}
# Check for quota warnings
if usage_metrics.get("tokens_used", 0) > 10000:
usage_record["warnings"].append({
"type": "high_token_usage",
"message": "High token usage detected",
"threshold": 10000,
"actual": usage_metrics.get("tokens_used", 0)
})
return format_response(
data=usage_record,
capability_used="usage:*:write",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to record usage: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="usage:*:write",
request_id=getattr(request.state, 'request_id', None)
)
# Async helper functions
async def process_document_async(processing_id: str, rag_req: RAGProcessRequest):
"""Background task for document processing"""
# Implement actual document processing logic here
await asyncio.sleep(30) # Simulate processing
logger.info(f"Document processing completed: {processing_id}")
async def execute_agent_async(execution_id: str, agent_req: AgentExecutionRequest):
"""Background task for agent execution"""
# Implement actual agent execution logic here
await asyncio.sleep(agent_req.timeout // 2) # Simulate execution
logger.info(f"Agent execution completed: {execution_id}")