GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents
- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2 - Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2 - Made more general-purpose (flexible targets, expanded tools) - Added nemotron-mini-agent.csv for fast local inference via Ollama - Added nemotron-agent.csv for advanced reasoning via Ollama - Added wiki page: Projects for NVIDIA NIMs and Nemotron
This commit is contained in:
404
apps/resource-cluster/app/api/v1/resources_cbrest.py
Normal file
404
apps/resource-cluster/app/api/v1/resources_cbrest.py
Normal file
@@ -0,0 +1,404 @@
|
||||
"""
|
||||
GT 2.0 Resource Cluster - Resource Management API with CB-REST Standards
|
||||
|
||||
This module handles non-AI endpoints using CB-REST standard.
|
||||
AI inference endpoints maintain OpenAI compatibility.
|
||||
"""
|
||||
from typing import List, Optional, Dict, Any
|
||||
from fastapi import APIRouter, Depends, Query, Request, BackgroundTasks
|
||||
from pydantic import BaseModel, Field
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from app.core.api_standards import (
|
||||
format_response,
|
||||
format_error,
|
||||
ErrorCode,
|
||||
APIError
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/resources", tags=["Resource Management"])
|
||||
|
||||
|
||||
# Request/Response Models
|
||||
class HealthCheckRequest(BaseModel):
|
||||
resource_id: str = Field(..., description="Resource identifier")
|
||||
deep_check: bool = Field(False, description="Perform deep health check")
|
||||
|
||||
|
||||
class RAGProcessRequest(BaseModel):
|
||||
document_content: str = Field(..., description="Document content to process")
|
||||
chunking_strategy: str = Field("semantic", description="Chunking strategy")
|
||||
chunk_size: int = Field(1000, ge=100, le=10000)
|
||||
chunk_overlap: int = Field(100, ge=0, le=500)
|
||||
embedding_model: str = Field("text-embedding-3-small")
|
||||
|
||||
|
||||
class SemanticSearchRequest(BaseModel):
|
||||
query: str = Field(..., description="Search query")
|
||||
collection_id: str = Field(..., description="Vector collection ID")
|
||||
top_k: int = Field(10, ge=1, le=100)
|
||||
relevance_threshold: float = Field(0.7, ge=0.0, le=1.0)
|
||||
filters: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class AgentExecutionRequest(BaseModel):
|
||||
agent_type: str = Field(..., description="Agent type")
|
||||
task: Dict[str, Any] = Field(..., description="Task configuration")
|
||||
timeout: int = Field(300, ge=10, le=3600, description="Timeout in seconds")
|
||||
execution_context: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
@router.get("/health/system")
|
||||
async def system_health(request: Request):
|
||||
"""
|
||||
Get overall system health status
|
||||
|
||||
CB-REST Capability Required: health:system:read
|
||||
"""
|
||||
try:
|
||||
health_status = {
|
||||
"overall_health": "healthy",
|
||||
"service_statuses": [
|
||||
{"service": "ai_inference", "status": "healthy", "latency_ms": 45},
|
||||
{"service": "rag_processing", "status": "healthy", "latency_ms": 120},
|
||||
{"service": "vector_storage", "status": "healthy", "latency_ms": 30},
|
||||
{"service": "agent_orchestration", "status": "healthy", "latency_ms": 85}
|
||||
],
|
||||
"resource_utilization": {
|
||||
"cpu_percent": 42.5,
|
||||
"memory_percent": 68.3,
|
||||
"gpu_percent": 35.0,
|
||||
"disk_percent": 55.2
|
||||
},
|
||||
"performance_metrics": {
|
||||
"requests_per_second": 145,
|
||||
"average_latency_ms": 95,
|
||||
"error_rate_percent": 0.02,
|
||||
"active_connections": 234
|
||||
},
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
return format_response(
|
||||
data=health_status,
|
||||
capability_used="health:system:read",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get system health: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used="health:system:read",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
|
||||
@router.post("/health/check")
|
||||
async def check_resource_health(
|
||||
request: Request,
|
||||
health_req: HealthCheckRequest,
|
||||
background_tasks: BackgroundTasks
|
||||
):
|
||||
"""
|
||||
Perform health check on a specific resource
|
||||
|
||||
CB-REST Capability Required: health:resource:check
|
||||
"""
|
||||
try:
|
||||
# Mock health check result
|
||||
health_result = {
|
||||
"resource_id": health_req.resource_id,
|
||||
"status": "healthy",
|
||||
"latency_ms": 87,
|
||||
"last_successful_request": datetime.utcnow().isoformat(),
|
||||
"error_count_24h": 3,
|
||||
"success_rate_24h": 99.97,
|
||||
"details": {
|
||||
"endpoint_reachable": True,
|
||||
"authentication_valid": True,
|
||||
"rate_limit_ok": True,
|
||||
"response_time_acceptable": True
|
||||
}
|
||||
}
|
||||
|
||||
if health_req.deep_check:
|
||||
health_result["deep_check_results"] = {
|
||||
"model_loaded": True,
|
||||
"memory_usage_mb": 2048,
|
||||
"inference_test_passed": True,
|
||||
"test_latency_ms": 145
|
||||
}
|
||||
|
||||
return format_response(
|
||||
data=health_result,
|
||||
capability_used="health:resource:check",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to check resource health: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used="health:resource:check",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
|
||||
@router.post("/rag/process-document")
|
||||
async def process_document(
|
||||
request: Request,
|
||||
rag_req: RAGProcessRequest,
|
||||
background_tasks: BackgroundTasks
|
||||
):
|
||||
"""
|
||||
Process document for RAG pipeline
|
||||
|
||||
CB-REST Capability Required: rag:document:process
|
||||
"""
|
||||
try:
|
||||
processing_id = str(uuid.uuid4())
|
||||
|
||||
# Start async processing
|
||||
background_tasks.add_task(
|
||||
process_document_async,
|
||||
processing_id,
|
||||
rag_req
|
||||
)
|
||||
|
||||
return format_response(
|
||||
data={
|
||||
"processing_id": processing_id,
|
||||
"status": "processing",
|
||||
"chunk_preview": [
|
||||
{
|
||||
"chunk_id": f"chunk_{i}",
|
||||
"text": f"Sample chunk {i} from document...",
|
||||
"metadata": {"position": i, "size": rag_req.chunk_size}
|
||||
}
|
||||
for i in range(3)
|
||||
],
|
||||
"estimated_completion": (datetime.utcnow() + timedelta(seconds=30)).isoformat()
|
||||
},
|
||||
capability_used="rag:document:process",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process document: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used="rag:document:process",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
|
||||
@router.post("/rag/semantic-search")
|
||||
async def semantic_search(
|
||||
request: Request,
|
||||
search_req: SemanticSearchRequest
|
||||
):
|
||||
"""
|
||||
Perform semantic search in vector database
|
||||
|
||||
CB-REST Capability Required: rag:search:execute
|
||||
"""
|
||||
try:
|
||||
# Mock search results
|
||||
results = [
|
||||
{
|
||||
"document_id": f"doc_{i}",
|
||||
"chunk_id": f"chunk_{i}",
|
||||
"text": f"Relevant text snippet {i} matching query: {search_req.query[:50]}...",
|
||||
"relevance_score": 0.95 - (i * 0.05),
|
||||
"metadata": {
|
||||
"source": f"document_{i}.pdf",
|
||||
"page": i + 1,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
}
|
||||
for i in range(min(search_req.top_k, 5))
|
||||
]
|
||||
|
||||
return format_response(
|
||||
data={
|
||||
"results": results,
|
||||
"query_embedding": [0.1] * 10, # Truncated for brevity
|
||||
"search_metadata": {
|
||||
"collection_id": search_req.collection_id,
|
||||
"documents_searched": 1500,
|
||||
"search_time_ms": 145,
|
||||
"model_used": "text-embedding-3-small"
|
||||
}
|
||||
},
|
||||
capability_used="rag:search:execute",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to perform semantic search: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used="rag:search:execute",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
|
||||
@router.post("/agents/execute")
|
||||
async def execute_agent(
|
||||
request: Request,
|
||||
agent_req: AgentExecutionRequest,
|
||||
background_tasks: BackgroundTasks
|
||||
):
|
||||
"""
|
||||
Execute an agentic workflow
|
||||
|
||||
CB-REST Capability Required: agent:*:execute
|
||||
"""
|
||||
try:
|
||||
execution_id = str(uuid.uuid4())
|
||||
|
||||
# Start async agent execution
|
||||
background_tasks.add_task(
|
||||
execute_agent_async,
|
||||
execution_id,
|
||||
agent_req
|
||||
)
|
||||
|
||||
return format_response(
|
||||
data={
|
||||
"execution_id": execution_id,
|
||||
"status": "queued",
|
||||
"estimated_duration": agent_req.timeout // 2,
|
||||
"resource_allocation": {
|
||||
"cpu_cores": 2,
|
||||
"memory_mb": 4096,
|
||||
"gpu_allocation": 0.25
|
||||
}
|
||||
},
|
||||
capability_used="agent:*:execute",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to execute agent: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used="agent:*:execute",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
|
||||
@router.get("/agents/{execution_id}/status")
|
||||
async def get_agent_status(
|
||||
request: Request,
|
||||
execution_id: str
|
||||
):
|
||||
"""
|
||||
Get agent execution status
|
||||
|
||||
CB-REST Capability Required: agent:{execution_id}:status
|
||||
"""
|
||||
try:
|
||||
# Mock status
|
||||
status = {
|
||||
"execution_id": execution_id,
|
||||
"status": "running",
|
||||
"progress_percent": 65,
|
||||
"current_task": {
|
||||
"name": "data_analysis",
|
||||
"status": "in_progress",
|
||||
"started_at": datetime.utcnow().isoformat()
|
||||
},
|
||||
"memory_usage": {
|
||||
"working_memory_mb": 512,
|
||||
"context_size": 8192,
|
||||
"tool_calls_made": 12
|
||||
},
|
||||
"performance_metrics": {
|
||||
"steps_completed": 8,
|
||||
"total_steps": 12,
|
||||
"average_step_time_ms": 2500,
|
||||
"errors_encountered": 0
|
||||
}
|
||||
}
|
||||
|
||||
return format_response(
|
||||
data=status,
|
||||
capability_used=f"agent:{execution_id}:status",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get agent status: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used=f"agent:{execution_id}:status",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
|
||||
@router.post("/usage/record")
|
||||
async def record_usage(
|
||||
request: Request,
|
||||
operation_type: str,
|
||||
resource_id: str,
|
||||
usage_metrics: Dict[str, Any]
|
||||
):
|
||||
"""
|
||||
Record resource usage for billing and analytics
|
||||
|
||||
CB-REST Capability Required: usage:*:write
|
||||
"""
|
||||
try:
|
||||
usage_record = {
|
||||
"record_id": str(uuid.uuid4()),
|
||||
"recorded": True,
|
||||
"updated_quotas": {
|
||||
"tokens_remaining": 950000,
|
||||
"requests_remaining": 9500,
|
||||
"cost_accumulated_cents": 125
|
||||
},
|
||||
"warnings": []
|
||||
}
|
||||
|
||||
# Check for quota warnings
|
||||
if usage_metrics.get("tokens_used", 0) > 10000:
|
||||
usage_record["warnings"].append({
|
||||
"type": "high_token_usage",
|
||||
"message": "High token usage detected",
|
||||
"threshold": 10000,
|
||||
"actual": usage_metrics.get("tokens_used", 0)
|
||||
})
|
||||
|
||||
return format_response(
|
||||
data=usage_record,
|
||||
capability_used="usage:*:write",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to record usage: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used="usage:*:write",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
|
||||
# Async helper functions
|
||||
async def process_document_async(processing_id: str, rag_req: RAGProcessRequest):
|
||||
"""Background task for document processing"""
|
||||
# Implement actual document processing logic here
|
||||
await asyncio.sleep(30) # Simulate processing
|
||||
logger.info(f"Document processing completed: {processing_id}")
|
||||
|
||||
|
||||
async def execute_agent_async(execution_id: str, agent_req: AgentExecutionRequest):
|
||||
"""Background task for agent execution"""
|
||||
# Implement actual agent execution logic here
|
||||
await asyncio.sleep(agent_req.timeout // 2) # Simulate execution
|
||||
logger.info(f"Agent execution completed: {execution_id}")
|
||||
Reference in New Issue
Block a user