- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2 - Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2 - Made more general-purpose (flexible targets, expanded tools) - Added nemotron-mini-agent.csv for fast local inference via Ollama - Added nemotron-agent.csv for advanced reasoning via Ollama - Added wiki page: Projects for NVIDIA NIMs and Nemotron
404 lines
13 KiB
Python
404 lines
13 KiB
Python
"""
|
|
GT 2.0 Resource Cluster - Resource Management API with CB-REST Standards
|
|
|
|
This module handles non-AI endpoints using CB-REST standard.
|
|
AI inference endpoints maintain OpenAI compatibility.
|
|
"""
|
|
from typing import List, Optional, Dict, Any
|
|
from fastapi import APIRouter, Depends, Query, Request, BackgroundTasks
|
|
from pydantic import BaseModel, Field
|
|
import logging
|
|
import uuid
|
|
from datetime import datetime, timedelta
|
|
|
|
from app.core.api_standards import (
|
|
format_response,
|
|
format_error,
|
|
ErrorCode,
|
|
APIError
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
router = APIRouter(prefix="/resources", tags=["Resource Management"])
|
|
|
|
|
|
# Request/Response Models
|
|
class HealthCheckRequest(BaseModel):
|
|
resource_id: str = Field(..., description="Resource identifier")
|
|
deep_check: bool = Field(False, description="Perform deep health check")
|
|
|
|
|
|
class RAGProcessRequest(BaseModel):
|
|
document_content: str = Field(..., description="Document content to process")
|
|
chunking_strategy: str = Field("semantic", description="Chunking strategy")
|
|
chunk_size: int = Field(1000, ge=100, le=10000)
|
|
chunk_overlap: int = Field(100, ge=0, le=500)
|
|
embedding_model: str = Field("text-embedding-3-small")
|
|
|
|
|
|
class SemanticSearchRequest(BaseModel):
|
|
query: str = Field(..., description="Search query")
|
|
collection_id: str = Field(..., description="Vector collection ID")
|
|
top_k: int = Field(10, ge=1, le=100)
|
|
relevance_threshold: float = Field(0.7, ge=0.0, le=1.0)
|
|
filters: Optional[Dict[str, Any]] = None
|
|
|
|
|
|
class AgentExecutionRequest(BaseModel):
|
|
agent_type: str = Field(..., description="Agent type")
|
|
task: Dict[str, Any] = Field(..., description="Task configuration")
|
|
timeout: int = Field(300, ge=10, le=3600, description="Timeout in seconds")
|
|
execution_context: Optional[Dict[str, Any]] = None
|
|
|
|
|
|
@router.get("/health/system")
|
|
async def system_health(request: Request):
|
|
"""
|
|
Get overall system health status
|
|
|
|
CB-REST Capability Required: health:system:read
|
|
"""
|
|
try:
|
|
health_status = {
|
|
"overall_health": "healthy",
|
|
"service_statuses": [
|
|
{"service": "ai_inference", "status": "healthy", "latency_ms": 45},
|
|
{"service": "rag_processing", "status": "healthy", "latency_ms": 120},
|
|
{"service": "vector_storage", "status": "healthy", "latency_ms": 30},
|
|
{"service": "agent_orchestration", "status": "healthy", "latency_ms": 85}
|
|
],
|
|
"resource_utilization": {
|
|
"cpu_percent": 42.5,
|
|
"memory_percent": 68.3,
|
|
"gpu_percent": 35.0,
|
|
"disk_percent": 55.2
|
|
},
|
|
"performance_metrics": {
|
|
"requests_per_second": 145,
|
|
"average_latency_ms": 95,
|
|
"error_rate_percent": 0.02,
|
|
"active_connections": 234
|
|
},
|
|
"timestamp": datetime.utcnow().isoformat()
|
|
}
|
|
|
|
return format_response(
|
|
data=health_status,
|
|
capability_used="health:system:read",
|
|
request_id=getattr(request.state, 'request_id', None)
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Failed to get system health: {e}")
|
|
return format_error(
|
|
code=ErrorCode.SYSTEM_ERROR,
|
|
message="Internal server error",
|
|
capability_used="health:system:read",
|
|
request_id=getattr(request.state, 'request_id', None)
|
|
)
|
|
|
|
|
|
@router.post("/health/check")
|
|
async def check_resource_health(
|
|
request: Request,
|
|
health_req: HealthCheckRequest,
|
|
background_tasks: BackgroundTasks
|
|
):
|
|
"""
|
|
Perform health check on a specific resource
|
|
|
|
CB-REST Capability Required: health:resource:check
|
|
"""
|
|
try:
|
|
# Mock health check result
|
|
health_result = {
|
|
"resource_id": health_req.resource_id,
|
|
"status": "healthy",
|
|
"latency_ms": 87,
|
|
"last_successful_request": datetime.utcnow().isoformat(),
|
|
"error_count_24h": 3,
|
|
"success_rate_24h": 99.97,
|
|
"details": {
|
|
"endpoint_reachable": True,
|
|
"authentication_valid": True,
|
|
"rate_limit_ok": True,
|
|
"response_time_acceptable": True
|
|
}
|
|
}
|
|
|
|
if health_req.deep_check:
|
|
health_result["deep_check_results"] = {
|
|
"model_loaded": True,
|
|
"memory_usage_mb": 2048,
|
|
"inference_test_passed": True,
|
|
"test_latency_ms": 145
|
|
}
|
|
|
|
return format_response(
|
|
data=health_result,
|
|
capability_used="health:resource:check",
|
|
request_id=getattr(request.state, 'request_id', None)
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Failed to check resource health: {e}")
|
|
return format_error(
|
|
code=ErrorCode.SYSTEM_ERROR,
|
|
message="Internal server error",
|
|
capability_used="health:resource:check",
|
|
request_id=getattr(request.state, 'request_id', None)
|
|
)
|
|
|
|
|
|
@router.post("/rag/process-document")
|
|
async def process_document(
|
|
request: Request,
|
|
rag_req: RAGProcessRequest,
|
|
background_tasks: BackgroundTasks
|
|
):
|
|
"""
|
|
Process document for RAG pipeline
|
|
|
|
CB-REST Capability Required: rag:document:process
|
|
"""
|
|
try:
|
|
processing_id = str(uuid.uuid4())
|
|
|
|
# Start async processing
|
|
background_tasks.add_task(
|
|
process_document_async,
|
|
processing_id,
|
|
rag_req
|
|
)
|
|
|
|
return format_response(
|
|
data={
|
|
"processing_id": processing_id,
|
|
"status": "processing",
|
|
"chunk_preview": [
|
|
{
|
|
"chunk_id": f"chunk_{i}",
|
|
"text": f"Sample chunk {i} from document...",
|
|
"metadata": {"position": i, "size": rag_req.chunk_size}
|
|
}
|
|
for i in range(3)
|
|
],
|
|
"estimated_completion": (datetime.utcnow() + timedelta(seconds=30)).isoformat()
|
|
},
|
|
capability_used="rag:document:process",
|
|
request_id=getattr(request.state, 'request_id', None)
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Failed to process document: {e}")
|
|
return format_error(
|
|
code=ErrorCode.SYSTEM_ERROR,
|
|
message="Internal server error",
|
|
capability_used="rag:document:process",
|
|
request_id=getattr(request.state, 'request_id', None)
|
|
)
|
|
|
|
|
|
@router.post("/rag/semantic-search")
|
|
async def semantic_search(
|
|
request: Request,
|
|
search_req: SemanticSearchRequest
|
|
):
|
|
"""
|
|
Perform semantic search in vector database
|
|
|
|
CB-REST Capability Required: rag:search:execute
|
|
"""
|
|
try:
|
|
# Mock search results
|
|
results = [
|
|
{
|
|
"document_id": f"doc_{i}",
|
|
"chunk_id": f"chunk_{i}",
|
|
"text": f"Relevant text snippet {i} matching query: {search_req.query[:50]}...",
|
|
"relevance_score": 0.95 - (i * 0.05),
|
|
"metadata": {
|
|
"source": f"document_{i}.pdf",
|
|
"page": i + 1,
|
|
"timestamp": datetime.utcnow().isoformat()
|
|
}
|
|
}
|
|
for i in range(min(search_req.top_k, 5))
|
|
]
|
|
|
|
return format_response(
|
|
data={
|
|
"results": results,
|
|
"query_embedding": [0.1] * 10, # Truncated for brevity
|
|
"search_metadata": {
|
|
"collection_id": search_req.collection_id,
|
|
"documents_searched": 1500,
|
|
"search_time_ms": 145,
|
|
"model_used": "text-embedding-3-small"
|
|
}
|
|
},
|
|
capability_used="rag:search:execute",
|
|
request_id=getattr(request.state, 'request_id', None)
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Failed to perform semantic search: {e}")
|
|
return format_error(
|
|
code=ErrorCode.SYSTEM_ERROR,
|
|
message="Internal server error",
|
|
capability_used="rag:search:execute",
|
|
request_id=getattr(request.state, 'request_id', None)
|
|
)
|
|
|
|
|
|
@router.post("/agents/execute")
|
|
async def execute_agent(
|
|
request: Request,
|
|
agent_req: AgentExecutionRequest,
|
|
background_tasks: BackgroundTasks
|
|
):
|
|
"""
|
|
Execute an agentic workflow
|
|
|
|
CB-REST Capability Required: agent:*:execute
|
|
"""
|
|
try:
|
|
execution_id = str(uuid.uuid4())
|
|
|
|
# Start async agent execution
|
|
background_tasks.add_task(
|
|
execute_agent_async,
|
|
execution_id,
|
|
agent_req
|
|
)
|
|
|
|
return format_response(
|
|
data={
|
|
"execution_id": execution_id,
|
|
"status": "queued",
|
|
"estimated_duration": agent_req.timeout // 2,
|
|
"resource_allocation": {
|
|
"cpu_cores": 2,
|
|
"memory_mb": 4096,
|
|
"gpu_allocation": 0.25
|
|
}
|
|
},
|
|
capability_used="agent:*:execute",
|
|
request_id=getattr(request.state, 'request_id', None)
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Failed to execute agent: {e}")
|
|
return format_error(
|
|
code=ErrorCode.SYSTEM_ERROR,
|
|
message="Internal server error",
|
|
capability_used="agent:*:execute",
|
|
request_id=getattr(request.state, 'request_id', None)
|
|
)
|
|
|
|
|
|
@router.get("/agents/{execution_id}/status")
|
|
async def get_agent_status(
|
|
request: Request,
|
|
execution_id: str
|
|
):
|
|
"""
|
|
Get agent execution status
|
|
|
|
CB-REST Capability Required: agent:{execution_id}:status
|
|
"""
|
|
try:
|
|
# Mock status
|
|
status = {
|
|
"execution_id": execution_id,
|
|
"status": "running",
|
|
"progress_percent": 65,
|
|
"current_task": {
|
|
"name": "data_analysis",
|
|
"status": "in_progress",
|
|
"started_at": datetime.utcnow().isoformat()
|
|
},
|
|
"memory_usage": {
|
|
"working_memory_mb": 512,
|
|
"context_size": 8192,
|
|
"tool_calls_made": 12
|
|
},
|
|
"performance_metrics": {
|
|
"steps_completed": 8,
|
|
"total_steps": 12,
|
|
"average_step_time_ms": 2500,
|
|
"errors_encountered": 0
|
|
}
|
|
}
|
|
|
|
return format_response(
|
|
data=status,
|
|
capability_used=f"agent:{execution_id}:status",
|
|
request_id=getattr(request.state, 'request_id', None)
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Failed to get agent status: {e}")
|
|
return format_error(
|
|
code=ErrorCode.SYSTEM_ERROR,
|
|
message="Internal server error",
|
|
capability_used=f"agent:{execution_id}:status",
|
|
request_id=getattr(request.state, 'request_id', None)
|
|
)
|
|
|
|
|
|
@router.post("/usage/record")
|
|
async def record_usage(
|
|
request: Request,
|
|
operation_type: str,
|
|
resource_id: str,
|
|
usage_metrics: Dict[str, Any]
|
|
):
|
|
"""
|
|
Record resource usage for billing and analytics
|
|
|
|
CB-REST Capability Required: usage:*:write
|
|
"""
|
|
try:
|
|
usage_record = {
|
|
"record_id": str(uuid.uuid4()),
|
|
"recorded": True,
|
|
"updated_quotas": {
|
|
"tokens_remaining": 950000,
|
|
"requests_remaining": 9500,
|
|
"cost_accumulated_cents": 125
|
|
},
|
|
"warnings": []
|
|
}
|
|
|
|
# Check for quota warnings
|
|
if usage_metrics.get("tokens_used", 0) > 10000:
|
|
usage_record["warnings"].append({
|
|
"type": "high_token_usage",
|
|
"message": "High token usage detected",
|
|
"threshold": 10000,
|
|
"actual": usage_metrics.get("tokens_used", 0)
|
|
})
|
|
|
|
return format_response(
|
|
data=usage_record,
|
|
capability_used="usage:*:write",
|
|
request_id=getattr(request.state, 'request_id', None)
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Failed to record usage: {e}")
|
|
return format_error(
|
|
code=ErrorCode.SYSTEM_ERROR,
|
|
message="Internal server error",
|
|
capability_used="usage:*:write",
|
|
request_id=getattr(request.state, 'request_id', None)
|
|
)
|
|
|
|
|
|
# Async helper functions
|
|
async def process_document_async(processing_id: str, rag_req: RAGProcessRequest):
|
|
"""Background task for document processing"""
|
|
# Implement actual document processing logic here
|
|
await asyncio.sleep(30) # Simulate processing
|
|
logger.info(f"Document processing completed: {processing_id}")
|
|
|
|
|
|
async def execute_agent_async(execution_id: str, agent_req: AgentExecutionRequest):
|
|
"""Background task for agent execution"""
|
|
# Implement actual agent execution logic here
|
|
await asyncio.sleep(agent_req.timeout // 2) # Simulate execution
|
|
logger.info(f"Agent execution completed: {execution_id}") |